onnx.__version__
'1.8.0'

1. prepare model

import torchvision.models as models
resnet18 = models.resnet18(pretrained=True)
Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\KangningCAI/.cache\torch\hub\checkpoints\resnet18-5c106cde.pth
100.0%
import torch
import torchvision

dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
model = torchvision.models.resnet18(pretrained=True).cuda()
print(model(dummy_input))
# 可以根据模块图形的数值设置输入输出的显示名称。这些设置不会改变此图形的语义。只是会变得更加可读了。
#该网络的输入包含了输入的扁平表(flat list)。也就是说传入forward()里面的值,其后是扁平表的参数。
#你可以指定一部分名字,例如指定一个比该模块输入数量更少的表,随后我们会从一开始就设定名字。
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

torch.onnx.export(model, dummy_input, "resnet18.onnx", verbose=True,
                  input_names=input_names, output_names=output_names)
tensor([[-1.4664, -1.2065, -0.2031,  ..., -0.3920,  5.7189,  3.1503],
        [-1.5115, -0.8432,  0.7770,  ...,  1.0732, -1.6643,  0.2798],
        [ 0.3694, -1.5528, -1.0725,  ...,  0.9766,  4.3036,  0.3204],
        ...,
        [ 1.1199,  0.8067,  2.0376,  ...,  0.2713, -0.2034,  0.0839],
        [-0.0470,  0.7559, -1.8203,  ..., -0.0480,  1.7802,  1.0056],
        [-1.1093, -2.6424, -1.1345,  ..., -1.4737,  0.6720,  0.4368]],
       device='cuda:0', grad_fn=<AddmmBackward>)
graph(%actual_input_1 : Float(10:150528, 3:50176, 224:224, 224:1, requires_grad=0, device=cuda:0),
      %fc.weight : Float(1000:512, 512:1, requires_grad=1, device=cuda:0),
      %fc.bias : Float(1000:1, requires_grad=1, device=cuda:0),
      %193 : Float(64:147, 3:49, 7:7, 7:1, requires_grad=0, device=cuda:0),
      %194 : Float(64:1, requires_grad=0, device=cuda:0),
      %196 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %197 : Float(64:1, requires_grad=0, device=cuda:0),
      %199 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %200 : Float(64:1, requires_grad=0, device=cuda:0),
      %202 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %203 : Float(64:1, requires_grad=0, device=cuda:0),
      %205 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %206 : Float(64:1, requires_grad=0, device=cuda:0),
      %208 : Float(128:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %209 : Float(128:1, requires_grad=0, device=cuda:0),
      %211 : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %212 : Float(128:1, requires_grad=0, device=cuda:0),
      %214 : Float(128:64, 64:1, 1:1, 1:1, requires_grad=0, device=cuda:0),
      %215 : Float(128:1, requires_grad=0, device=cuda:0),
      %217 : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %218 : Float(128:1, requires_grad=0, device=cuda:0),
      %220 : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %221 : Float(128:1, requires_grad=0, device=cuda:0),
      %223 : Float(256:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %224 : Float(256:1, requires_grad=0, device=cuda:0),
      %226 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %227 : Float(256:1, requires_grad=0, device=cuda:0),
      %229 : Float(256:128, 128:1, 1:1, 1:1, requires_grad=0, device=cuda:0),
      %230 : Float(256:1, requires_grad=0, device=cuda:0),
      %232 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %233 : Float(256:1, requires_grad=0, device=cuda:0),
      %235 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %236 : Float(256:1, requires_grad=0, device=cuda:0),
      %238 : Float(512:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %239 : Float(512:1, requires_grad=0, device=cuda:0),
      %241 : Float(512:4608, 512:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %242 : Float(512:1, requires_grad=0, device=cuda:0),
      %244 : Float(512:256, 256:1, 1:1, 1:1, requires_grad=0, device=cuda:0),
      %245 : Float(512:1, requires_grad=0, device=cuda:0),
      %247 : Float(512:4608, 512:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %248 : Float(512:1, requires_grad=0, device=cuda:0),
      %250 : Float(512:4608, 512:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %251 : Float(512:1, requires_grad=0, device=cuda:0)):
  %192 : Float(10:802816, 64:12544, 112:112, 112:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[7, 7], pads=[3, 3, 3, 3], strides=[2, 2]](%actual_input_1, %193, %194)
  %125 : Float(10:802816, 64:12544, 112:112, 112:1, requires_grad=1, device=cuda:0) = onnx::Relu(%192) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %126 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::MaxPool[kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%125) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:586:0
  %195 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%126, %196, %197)
  %129 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%195) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %198 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%129, %199, %200)
  %132 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Add(%198, %126)
  %133 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%132) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %201 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%133, %202, %203)
  %136 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%201) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %204 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%136, %205, %206)
  %139 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Add(%204, %133)
  %140 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%139) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %207 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%140, %208, %209)
  %143 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%207) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %210 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%143, %211, %212)
  %213 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2]](%140, %214, %215)
  %148 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Add(%210, %213)
  %149 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%148) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %216 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%149, %217, %218)
  %152 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%216) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %219 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%152, %220, %221)
  %155 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Add(%219, %149)
  %156 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%155) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %222 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%156, %223, %224)
  %159 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%222) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %225 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%159, %226, %227)
  %228 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2]](%156, %229, %230)
  %164 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Add(%225, %228)
  %165 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%164) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %231 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%165, %232, %233)
  %168 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%231) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %234 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%168, %235, %236)
  %171 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Add(%234, %165)
  %172 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%171) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %237 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%172, %238, %239)
  %175 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%237) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %240 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%175, %241, %242)
  %243 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2]](%172, %244, %245)
  %180 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Add(%240, %243)
  %181 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%180) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %246 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%181, %247, %248)
  %184 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%246) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %249 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%184, %250, %251)
  %187 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Add(%249, %181)
  %188 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%187) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %189 : Float(10:512, 512:1, 1:1, 1:1, requires_grad=1, device=cuda:0) = onnx::GlobalAveragePool(%188) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:936:0
  %190 : Float(10:512, 512:1, requires_grad=1, device=cuda:0) = onnx::Flatten[axis=1](%189) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torchvision\models\resnet.py:214:0
  %output1 : Float(10:1000, 1000:1, requires_grad=1, device=cuda:0) = onnx::Gemm[alpha=1., beta=1., transB=1](%190, %fc.weight, %fc.bias) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1690:0
  return (%output1)

2. run onnx with tf backend

import onnx
from onnx_tf.backend import prepare

onnx_model = onnx.load("resnet18.onnx") # load onnx model

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)


# Print a human readable representation of the graph
onnx.helper.printable_graph(onnx_model.graph)
'graph torch-jit-export (\n  %actual_input_1[FLOAT, 10x3x224x224]\n) initializers (\n  %193[FLOAT, 64x3x7x7]\n  %194[FLOAT, 64]\n  %196[FLOAT, 64x64x3x3]\n  %197[FLOAT, 64]\n  %199[FLOAT, 64x64x3x3]\n  %200[FLOAT, 64]\n  %202[FLOAT, 64x64x3x3]\n  %203[FLOAT, 64]\n  %205[FLOAT, 64x64x3x3]\n  %206[FLOAT, 64]\n  %208[FLOAT, 128x64x3x3]\n  %209[FLOAT, 128]\n  %211[FLOAT, 128x128x3x3]\n  %212[FLOAT, 128]\n  %214[FLOAT, 128x64x1x1]\n  %215[FLOAT, 128]\n  %217[FLOAT, 128x128x3x3]\n  %218[FLOAT, 128]\n  %220[FLOAT, 128x128x3x3]\n  %221[FLOAT, 128]\n  %223[FLOAT, 256x128x3x3]\n  %224[FLOAT, 256]\n  %226[FLOAT, 256x256x3x3]\n  %227[FLOAT, 256]\n  %229[FLOAT, 256x128x1x1]\n  %230[FLOAT, 256]\n  %232[FLOAT, 256x256x3x3]\n  %233[FLOAT, 256]\n  %235[FLOAT, 256x256x3x3]\n  %236[FLOAT, 256]\n  %238[FLOAT, 512x256x3x3]\n  %239[FLOAT, 512]\n  %241[FLOAT, 512x512x3x3]\n  %242[FLOAT, 512]\n  %244[FLOAT, 512x256x1x1]\n  %245[FLOAT, 512]\n  %247[FLOAT, 512x512x3x3]\n  %248[FLOAT, 512]\n  %250[FLOAT, 512x512x3x3]\n  %251[FLOAT, 512]\n  %fc.bias[FLOAT, 1000]\n  %fc.weight[FLOAT, 1000x512]\n) {\n  %192 = Conv[dilations = [1, 1], group = 1, kernel_shape = [7, 7], pads = [3, 3, 3, 3], strides = [2, 2]](%actual_input_1, %193, %194)\n  %125 = Relu(%192)\n  %126 = MaxPool[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%125)\n  %195 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%126, %196, %197)\n  %129 = Relu(%195)\n  %198 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%129, %199, %200)\n  %132 = Add(%198, %126)\n  %133 = Relu(%132)\n  %201 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%133, %202, %203)\n  %136 = Relu(%201)\n  %204 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%136, %205, %206)\n  %139 = Add(%204, %133)\n  %140 = Relu(%139)\n  %207 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%140, %208, %209)\n  %143 = Relu(%207)\n  %210 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%143, %211, %212)\n  %213 = Conv[dilations = [1, 1], group = 1, kernel_shape = [1, 1], pads = [0, 0, 0, 0], strides = [2, 2]](%140, %214, %215)\n  %148 = Add(%210, %213)\n  %149 = Relu(%148)\n  %216 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%149, %217, %218)\n  %152 = Relu(%216)\n  %219 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%152, %220, %221)\n  %155 = Add(%219, %149)\n  %156 = Relu(%155)\n  %222 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%156, %223, %224)\n  %159 = Relu(%222)\n  %225 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%159, %226, %227)\n  %228 = Conv[dilations = [1, 1], group = 1, kernel_shape = [1, 1], pads = [0, 0, 0, 0], strides = [2, 2]](%156, %229, %230)\n  %164 = Add(%225, %228)\n  %165 = Relu(%164)\n  %231 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%165, %232, %233)\n  %168 = Relu(%231)\n  %234 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%168, %235, %236)\n  %171 = Add(%234, %165)\n  %172 = Relu(%171)\n  %237 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%172, %238, %239)\n  %175 = Relu(%237)\n  %240 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%175, %241, %242)\n  %243 = Conv[dilations = [1, 1], group = 1, kernel_shape = [1, 1], pads = [0, 0, 0, 0], strides = [2, 2]](%172, %244, %245)\n  %180 = Add(%240, %243)\n  %181 = Relu(%180)\n  %246 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%181, %247, %248)\n  %184 = Relu(%246)\n  %249 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%184, %250, %251)\n  %187 = Add(%249, %181)\n  %188 = Relu(%187)\n  %189 = GlobalAveragePool(%188)\n  %190 = Flatten[axis = 1](%189)\n  %output1 = Gemm[alpha = 1, beta = 1, transB = 1](%190, %fc.weight, %fc.bias)\n  return %output1\n}'
print()
#.data().to_numpy())
output = prepare(onnx_model).run(dummy_input.cpu())  # run the loaded model
output
Outputs(output1=array([[0.5069145 , 2.8980782 , 2.8367603 , ..., 0.2859818 , 0.17711115,
        1.2873001 ],
       [0.46708533, 2.6108694 , 2.5460322 , ..., 0.3299198 , 0.44836068,
        1.4698317 ],
       [0.5995231 , 2.6909895 , 2.4556172 , ..., 0.3942047 , 0.28599155,
        1.350537  ],
       ...,
       [1.0403817 , 3.0089192 , 3.004794  , ..., 0.0915335 , 0.13177788,
        1.5754144 ],
       [0.2126701 , 2.686536  , 2.877485  , ..., 0.20710337, 0.32794827,
        1.4289923 ],
       [0.38755298, 2.7030327 , 2.4524589 , ..., 0.21225189, 0.4668123 ,
        1.3190124 ]], dtype=float32))

GPU运行(算子不支持)

output = prepare(onnx_model, device="CUDA").run(dummy_input.cpu())  # run the loaded model
print(output)
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-70-d727142059ee> in <module>
----> 1 output = prepare(onnx_model, device="CUDA").run(dummy_input.cpu())  # run the loaded model
      2 print(output)

c:\users\kangningcai\downloads\compressed\onnx-tensorflow-master\onnx_tf\backend_rep.py in run(self, inputs, **kwargs)
     91     input_dict = dict([(x[0], tf.constant(x[1])) for x in feed_dict.items()])
     92 
---> 93     output_values = self.tf_module(**input_dict)
     94     output_values = [
     95         val.numpy() if isinstance(val, tf.Tensor) else val

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
    778       else:
    779         compiler = "nonXla"
--> 780         result = self._call(*args, **kwds)
    781 
    782       new_tracing_count = self._get_tracing_count()

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
    844               *args, **kwds)
    845       # If we did not create any variables the trace we have is good enough.
--> 846       return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds)  # pylint: disable=protected-access
    847 
    848     def fn_with_cond(*inner_args, **inner_kwds):

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
   1846                            resource_variable_ops.BaseResourceVariable))],
   1847         captured_inputs=self.captured_inputs,
-> 1848         cancellation_manager=cancellation_manager)
   1849 
   1850   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1922       # No tape is watching; skip to running the function.
   1923       return self._build_call_outputs(self._inference_function.call(
-> 1924           ctx, args, cancellation_manager=cancellation_manager))
   1925     forward_backward = self._select_forward_and_backward_functions(
   1926         args,

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
    548               inputs=args,
    549               attrs=attrs,
--> 550               ctx=ctx)
    551         else:
    552           outputs = execute.execute_with_cancellation(

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     58     ctx.ensure_initialized()
     59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60                                         inputs, attrs, num_outputs)
     61   except core._NotOkStatusException as e:
     62     if name is not None:

InvalidArgumentError:  Default MaxPoolingOp only supports NHWC on device type CPU
	 [[node MaxPool (defined at c:\users\kangningcai\downloads\compressed\onnx-tensorflow-master\onnx_tf\handlers\backend\dilated_pooling.py:705) ]] [Op:__inference___call___7188]

Errors may have originated from an input operation.
Input Source operations connected to node MaxPool:
 PadV2 (defined at c:\users\kangningcai\downloads\compressed\onnx-tensorflow-master\onnx_tf\handlers\backend\dilated_pooling.py:517)

Function call stack:
__call__

3. run onnx with onnxruntime-gpu backend

import onnxruntime as ort
sess = ort.InferenceSession("resnet18.onnx")#, provider_options)
input_s =sess.get_inputs()
print(input_s)
[<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg object at 0x00000233B2779378>]
help(ort.InferenceSession)
Help on class InferenceSession in module onnxruntime.capi.onnxruntime_inference_collection:

class InferenceSession(Session)
 |  InferenceSession(path_or_bytes, sess_options=None, providers=None, provider_options=None)
 |  
 |  This is the main class used to run a model.
 |  
 |  Method resolution order:
 |      InferenceSession
 |      Session
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, path_or_bytes, sess_options=None, providers=None, provider_options=None)
 |      :param path_or_bytes: filename or serialized ONNX or ORT format model in a byte string
 |      :param sess_options: session options
 |      :param providers: list of providers to use for session. If empty, will use all available providers.
 |      :param provider_options: list of provider options dict for each provider, in the same order as 'providers'
 |      
 |      The model type will be inferred unless explicitly set in the SessionOptions.
 |      To explicitly set:
 |        so = onnxruntime.SessionOptions()
 |        so.add_session_config_entry('session.load_model_format', 'ONNX') or
 |        so.add_session_config_entry('session.load_model_format', 'ORT') or
 |      
 |      A file extension of '.ort' will be inferred as an ORT format model.
 |      All other filenames are assumed to be ONNX format models.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from Session:
 |  
 |  disable_fallback(self)
 |      Disable session.run() fallback mechanism.
 |  
 |  enable_fallback(self)
 |      Enable session.Run() fallback mechanism. If session.Run() fails due to an internal Execution Provider failure,
 |      reset the Execution Providers enabled for this session.
 |      If GPU is enabled, fall back to CUDAExecutionProvider.
 |      otherwise fall back to CPUExecutionProvider.
 |  
 |  end_profiling(self)
 |      End profiling and return results in a file.
 |      
 |      The results are stored in a filename if the option
 |      :meth:`onnxruntime.SessionOptions.enable_profiling`.
 |  
 |  get_inputs(self)
 |      Return the inputs metadata as a list of :class:`onnxruntime.NodeArg`.
 |  
 |  get_modelmeta(self)
 |      Return the metadata. See :class:`onnxruntime.ModelMetadata`.
 |  
 |  get_outputs(self)
 |      Return the outputs metadata as a list of :class:`onnxruntime.NodeArg`.
 |  
 |  get_overridable_initializers(self)
 |      Return the inputs (including initializers) metadata as a list of :class:`onnxruntime.NodeArg`.
 |  
 |  get_profiling_start_time_ns(self)
 |      Return the nanoseconds of profiling's start time
 |      Comparable to time.monotonic_ns() after Python 3.3
 |      On some platforms, this timer may not be as precise as nanoseconds
 |      For instance, on Windows and MacOS, the precision will be ~100ns
 |  
 |  get_provider_options(self)
 |      Return registered execution providers' configurations.
 |  
 |  get_providers(self)
 |      Return list of registered execution providers.
 |  
 |  get_session_options(self)
 |      Return the session options. See :class:`onnxruntime.SessionOptions`.
 |  
 |  io_binding(self)
 |      Return an onnxruntime.IOBinding object`.
 |  
 |  run(self, output_names, input_feed, run_options=None)
 |      Compute the predictions.
 |      
 |      :param output_names: name of the outputs
 |      :param input_feed: dictionary ``{ input_name: input_value }``
 |      :param run_options: See :class:`onnxruntime.RunOptions`.
 |      
 |      ::
 |      
 |          sess.run([output_name], {input_name: x})
 |  
 |  run_with_iobinding(self, iobinding, run_options=None)
 |      Compute the predictions.
 |      
 |      :param iobinding: the iobinding object that has graph inputs/outputs bind.
 |      :param run_options: See :class:`onnxruntime.RunOptions`.
 |  
 |  set_providers(self, providers, provider_options=None)
 |      Register the input list of execution providers. The underlying session is re-created.
 |      
 |      :param providers: list of execution providers
 |      :param provider_options: list of provider options dict for each provider, in the same order as 'providers'
 |      
 |      The list of providers is ordered by Priority. For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
 |      means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from Session:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
print(sess.get_inputs()[0].shape)
sess.get_provider_options()
[10, 3, 224, 224]





{'CUDAExecutionProvider': {'device_id': '0',
  'cuda_mem_limit': '18446744073709551615',
  'arena_extend_strategy': 'kNextPowerOfTwo'},
 'CPUExecutionProvider': {}}
sess.get_providers()
['CUDAExecutionProvider', 'CPUExecutionProvider']
#help(ort.RunOptions)
sess.set_providers(["CUDAExecutionProvider"])

inputs = sess.get_inputs()
len_inputs = len(inputs)
print(len_inputs, inputs)
print(inputs[0].name, inputs[0].shape)

outputs = sess.get_outputs()
len_outputs = len(outputs)
print(len_outputs, outputs)
print(outputs[0].name, outputs[0].shape)
1 [<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg object at 0x00000233B2BD96C0>]
actual_input_1 [10, 3, 224, 224]
1 [<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg object at 0x00000233AD7EFF48>]
output1 [10, 1000]

dummy_input = torch.randn(10, 3, 224, 224, device='cuda')

output = sess.run(["output1"], {"actual_input_1": dummy_input.cpu().numpy()})
print(output[0].shape)
print( output[0])
(10, 1000)
[[ 1.4698393   2.0387304   1.7258139  ...  0.83965874  0.5422346
   1.2121105 ]
 [ 1.1619968   2.2490318   2.1027348  ...  0.37480694  0.4261159
   1.2232935 ]
 [ 1.0652378   2.1549475   1.186148   ...  1.0919206   0.6438071
   1.1574954 ]
 ...
 [ 1.1870115   2.4116068   1.9177108  ...  0.27283034 -0.06160793
   1.1494317 ]
 [ 1.6173385   1.9768811   1.3131188  ...  0.78727126  0.57735306
   1.0042048 ]
 [ 1.6731455   2.8796082   2.7225587  ...  0.38563025  0.8073966
   1.1718726 ]]

4. run onnx with caffe2 backend

caffe2 backend is used for pytorch deployement by default.
caffe2 对于onnx模型推理的支持 实测不好,如下为代码和运行结果

import torch
import onnx
import caffe2.python.onnx.backend
from caffe2.python.onnx.backend import prepare


# Prepare the inputs, here we use numpy to generate some random inputs for demo purpose
import numpy as np
img = np.random.randn(1, 3, 224, 224).astype(np.float32)
"""
# Load the ONNX model
model = onnx.load('assets/squeezenet.onnx')
# Run the ONNX model with Caffe2
outputs = caffe2.python.onnx.backend.run_model(model, [img])
"""
dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
onnx_model = onnx.load("resnet18.onnx") # load onnx model

prepared = prepare(onnx_model, device="CPU")

B = {onnx_model.graph.input[0].name: dummy_input.cpu().numpy()}
print(B)
    
outputs = caffe2.python.onnx.backend.run_model(onnx_model, B, device="CUDA")
help(caffe2.python.onnx.backend)
IndexError                                Traceback (most recent call last)
<ipython-input-61-45a155392781> in <module>
     17 onnx_model = onnx.load("resnet18.onnx") # load onnx model
     18 
---> 19 prepared = prepare(onnx_model, device="CPU")
     20 
     21 B = {onnx_model.graph.input[0].name: dummy_input.cpu().numpy()}

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\caffe2\python\onnx\backend.py in prepare(cls, model, device, raw_values_dict, **kwargs)
    711         device_option = get_device_option(Device(device))
    712 
--> 713         init_net, predict_net = cls._onnx_model_to_caffe2_net(model, device, opset_version, False)
    714 
    715         if raw_values_dict:

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\caffe2\python\onnx\backend.py in _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers)
    874         device_option = get_device_option(Device(device))
    875 
--> 876         onnx_model = onnx.utils.polish_model(onnx_model)
    877         init_model = cls.optimize_onnx(onnx_model, init=True)
    878         pred_model = cls.optimize_onnx(onnx_model, predict=True)

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\onnx\utils.py in polish_model(model)
     19     onnx.helper.strip_doc_string(model)
     20     model = onnx.shape_inference.infer_shapes(model)
---> 21     model = onnx.optimizer.optimize(model)
     22     onnx.checker.check_model(model)
     23     return model

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\onnx\optimizer.py in optimize(model, passes, fixed_point)
     53         optimized_model_str = C.optimize_fixedpoint(model_str, passes)
     54     else:
---> 55         optimized_model_str = C.optimize(model_str, passes)
     56 
     57     return onnx.load_from_string(optimized_model_str)

IndexError: Input 193 is undefined!

help( onnx.backend.base.Backend.supports_device)
#onnx.backend.base.Backend.supports_device("NPU")
Help on method supports_device in module onnx.backend.base:

supports_device(device) method of builtins.type instance
    Checks whether the backend is compiled with particular device support.
    In particular it's used in the testing suite.
GitHub 加速计划 / on / onnxruntime
13.76 K
2.79 K
下载
microsoft/onnxruntime: 是一个用于运行各种机器学习模型的开源库。适合对机器学习和深度学习有兴趣的人,特别是在开发和部署机器学习模型时需要处理各种不同框架和算子的人。特点是支持多种机器学习框架和算子,包括 TensorFlow、PyTorch、Caffe 等,具有高性能和广泛的兼容性。
最近提交(Master分支:2 个月前 )
120cb5a8 ### Description Add I/O binding example using onnx data type in python API summary. The API is available since 1.20 release. ### Motivation and Context Follow up of https://github.com/microsoft/onnxruntime/pull/22306 to add some documentation. 4 天前
4ffc1ff3 ### Description Fixes: (1) cpu kernel: applying scale before bias and mask like other MHA ops (2) cpu kernel: correct offset during appending past to present. (3) cuda kernel: apply mask if provided; fix output_qk offset. Add DMMHA unit tests 4 天前
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐