Lisez ceci dans d'autres langues: anglais, 简体中文.
Interface C ++: 3 lignes de code sont tout ce dont vous avez besoin pour exécuter un yolox
// create inference engine on gpu-0
// auto engine = Yolo::create_infer("yolov5m.fp32.trtmodel", Yolo::Type::V5, 0);
auto engine = Yolo::create_infer( " yolox_m.fp32.trtmodel " , Yolo::Type::X, 0 );
// load image
auto image = cv::imread( " 1.jpg " );
// do inference and get the result
auto box = engine-> commit (image).get(); // return vector<Box>Interface Python:
import pytrt
model = models . resnet18 ( True ). eval (). to ( device )
trt_model = tp . from_torch ( model , input )
trt_out = trt_model ( input ) import os
import cv2
import numpy as np
import pytrt as tp
engine_file = "yolov5s.fp32.trtmodel"
if not os . path . exists ( engine_file ):
tp . compile_onnx_to_file ( 1 , tp . onnx_hub ( "yolov5s" ), engine_file )
yolo = tp . Yolo ( engine_file , type = tp . YoloType . V5 )
image = cv2 . imread ( "car.jpg" )
bboxes = yolo . commit ( image ). get ()
print ( f" { len ( bboxes ) } objects" )
for box in bboxes :
left , top , right , bottom = map ( int , [ box . left , box . top , box . right , box . bottom ])
cv2 . rectangle ( image , ( left , top ), ( right , bottom ), tp . random_color ( box . class_label ), 5 )
saveto = "yolov5.car.jpg"
print ( f"Save to { saveto } " )
cv2 . imwrite ( saveto , image )
cv2 . imshow ( "result" , image )
cv2 . waitKey ()| Modèle | Résolution | Taper | Précision | Temps écoulé | FPS |
|---|---|---|---|---|---|
| yolox_x | 640x640 | Yolox | Fp32 | 21.879 | 45.71 |
| yolox_l | 640x640 | Yolox | Fp32 | 12h308 | 81.25 |
| yolox_m | 640x640 | Yolox | Fp32 | 6.862 | 145,72 |
| yolox_s | 640x640 | Yolox | Fp32 | 3.088 | 323.81 |
| yolox_x | 640x640 | Yolox | FP16 | 6.763 | 147.86 |
| yolox_l | 640x640 | Yolox | FP16 | 3.933 | 254.25 |
| yolox_m | 640x640 | Yolox | FP16 | 2.515 | 397,55 |
| yolox_s | 640x640 | Yolox | FP16 | 1.362 | 734.48 |
| yolox_x | 640x640 | Yolox | Int8 | 4.070 | 245.68 |
| yolox_l | 640x640 | Yolox | Int8 | 2.444 | 409.21 |
| yolox_m | 640x640 | Yolox | Int8 | 1.730 | 577,98 |
| yolox_s | 640x640 | Yolox | Int8 | 1.060 | 943.15 |
| yolov5x6 | 1280x1280 | Yolov5_p6 | Fp32 | 68.022 | 14.70 |
| yolov5l6 | 1280x1280 | Yolov5_p6 | Fp32 | 37.931 | 26.36 |
| yolov5m6 | 1280x1280 | Yolov5_p6 | Fp32 | 20.127 | 49.69 |
| yolov5s6 | 1280x1280 | Yolov5_p6 | Fp32 | 8.715 | 114.75 |
| yolov5x | 640x640 | Yolov5_p5 | Fp32 | 18.480 | 54.11 |
| yolov5l | 640x640 | Yolov5_p5 | Fp32 | 10.110 | 98.91 |
| yolov5m | 640x640 | Yolov5_p5 | Fp32 | 5.639 | 177.33 |
| yolov5s | 640x640 | Yolov5_p5 | Fp32 | 2.578 | 387,92 |
| yolov5x6 | 1280x1280 | Yolov5_p6 | FP16 | 20.877 | 47,90 |
| yolov5l6 | 1280x1280 | Yolov5_p6 | FP16 | 10.960 | 91.24 |
| yolov5m6 | 1280x1280 | Yolov5_p6 | FP16 | 7.236 | 138.20 |
| yolov5s6 | 1280x1280 | Yolov5_p6 | FP16 | 3.851 | 259.68 |
| yolov5x | 640x640 | Yolov5_p5 | FP16 | 5.933 | 168,55 |
| yolov5l | 640x640 | Yolov5_p5 | FP16 | 3.450 | 289.86 |
| yolov5m | 640x640 | Yolov5_p5 | FP16 | 2.184 | 457,90 |
| yolov5s | 640x640 | Yolov5_p5 | FP16 | 1.307 | 765.10 |
| yolov5x6 | 1280x1280 | Yolov5_p6 | Int8 | 12.207 | 81,92 |
| yolov5l6 | 1280x1280 | Yolov5_p6 | Int8 | 7.221 | 138.49 |
| yolov5m6 | 1280x1280 | Yolov5_p6 | Int8 | 5.248 | 190,55 |
| yolov5s6 | 1280x1280 | Yolov5_p6 | Int8 | 3.149 | 317.54 |
| yolov5x | 640x640 | Yolov5_p5 | Int8 | 3.704 | 269.97 |
| yolov5l | 640x640 | Yolov5_p5 | Int8 | 2.255 | 443,53 |
| yolov5m | 640x640 | Yolov5_p5 | Int8 | 1.674 | 597.40 |
| yolov5s | 640x640 | Yolov5_p5 | Int8 | 1.143 | 874.91 |
| Modèle | Résolution | Taper | Précision | Temps écoulé | FPS |
|---|---|---|---|---|---|
| yolox_x_fast | 640x640 | Yolox | Fp32 | 21.598 | 46.30 |
| yolox_l_fast | 640x640 | Yolox | Fp32 | 12.199 | 81.97 |
| yolox_m_fast | 640x640 | Yolox | Fp32 | 6.819 | 146,65 |
| yolox_s_fast | 640x640 | Yolox | Fp32 | 2.979 | 335.73 |
| yolox_x_fast | 640x640 | Yolox | FP16 | 6.764 | 147.84 |
| yolox_l_fast | 640x640 | Yolox | FP16 | 3.866 | 258,64 |
| yolox_m_fast | 640x640 | Yolox | FP16 | 2.386 | 419.16 |
| yolox_s_fast | 640x640 | Yolox | FP16 | 1.259 | 794.36 |
| yolox_x_fast | 640x640 | Yolox | Int8 | 3.918 | 255.26 |
| yolox_l_fast | 640x640 | Yolox | Int8 | 2.292 | 436.38 |
| yolox_m_fast | 640x640 | Yolox | Int8 | 1.589 | 629.49 |
| yolox_s_fast | 640x640 | Yolox | Int8 | 0,954 | 1048.47 |
| yolov5x6_fast | 1280x1280 | Yolov5_p6 | Fp32 | 67.075 | 14.91 |
| yolov5l6_fast | 1280x1280 | Yolov5_p6 | Fp32 | 37.491 | 26.67 |
| yolov5m6_fast | 1280x1280 | Yolov5_p6 | Fp32 | 19.422 | 51.49 |
| yolov5s6_fast | 1280x1280 | Yolov5_p6 | Fp32 | 7.900 | 126.57 |
| yolov5x_fast | 640x640 | Yolov5_p5 | Fp32 | 18.554 | 53.90 |
| yolov5l_fast | 640x640 | Yolov5_p5 | Fp32 | 10.060 | 99.41 |
| yolov5m_fast | 640x640 | Yolov5_p5 | Fp32 | 5.500 | 181.82 |
| yolov5s_fast | 640x640 | Yolov5_p5 | Fp32 | 2.342 | 427.07 |
| yolov5x6_fast | 1280x1280 | Yolov5_p6 | FP16 | 20.538 | 48,69 |
| yolov5l6_fast | 1280x1280 | Yolov5_p6 | FP16 | 10.404 | 96.12 |
| yolov5m6_fast | 1280x1280 | Yolov5_p6 | FP16 | 6.577 | 152.06 |
| yolov5s6_fast | 1280x1280 | Yolov5_p6 | FP16 | 3.087 | 323.99 |
| yolov5x_fast | 640x640 | Yolov5_p5 | FP16 | 5.919 | 168,95 |
| yolov5l_fast | 640x640 | Yolov5_p5 | FP16 | 3.348 | 298,69 |
| yolov5m_fast | 640x640 | Yolov5_p5 | FP16 | 2.015 | 496.34 |
| yolov5s_fast | 640x640 | Yolov5_p5 | FP16 | 1.087 | 919.63 |
| yolov5x6_fast | 1280x1280 | Yolov5_p6 | Int8 | 11.236 | 89.00 |
| yolov5l6_fast | 1280x1280 | Yolov5_p6 | Int8 | 6.235 | 160,38 |
| yolov5m6_fast | 1280x1280 | Yolov5_p6 | Int8 | 4.311 | 231.97 |
| yolov5s6_fast | 1280x1280 | Yolov5_p6 | Int8 | 2.139 | 467.45 |
| yolov5x_fast | 640x640 | Yolov5_p5 | Int8 | 3.456 | 289.37 |
| yolov5l_fast | 640x640 | Yolov5_p5 | Int8 | 2.019 | 495.41 |
| yolov5m_fast | 640x640 | Yolov5_p5 | Int8 | 1.425 | 701.71 |
| yolov5s_fast | 640x640 | Yolov5_p5 | Int8 | 0,844 | 1185.47 |
-gencode=arch=compute_75,code=sm_75 . Si vous utilisez 3080Ti, cela devrait être gencode=arch=compute_86,code=sm_86mkdir build && cd buildcmake ..make yolo -j8make yolo -j8use_python := true in Makefileset(HAS_PYTHON ON) dans cmakelists.txtmake pyinstall -j8python/pytrt/libpytrtc.soVeuillez vérifier le lean / readme.md pour la dépendance détaillée
Dans Tensorrt.vcxproj, remplacez le <Import Project="$(VCTargetsPath)BuildCustomizationsCUDA 10.0.props" /> avec votre propre chemin CUDA
Dans Tensorrt.vcxproj, remplacez le <Import Project="$(VCTargetsPath)BuildCustomizationsCUDA 10.0.targets" /> avec votre propre chemin Cuda
Dans Tensorrt.vcxproj, remplacez le <CodeGeneration>compute_61,sm_61</CodeGeneration> avec votre capacité de calcul.
Configurez votre dépendance ou téléchargez-la dans le Foler / Lean. Configurer VC ++ Dir (inclure Dir et Refence)
Configurez votre environnement env, de débogage>
Compilez et exécutez l'exemple, où 3 options sont disponibles.
python/pytrt/libpytrtc.pydprotoc=/data/sxai/lean/protobuf3.11.4/bin/protoc dans Protoc avec le protoc de votre propre version # cd the path in terminal to /onnx
cd onnx
# execuete the command to make pb files
bash make_pb.shset(PROTOBUF_DIR "/data/sxai/lean/protobuf3.11.4") dans cMakelists.txt par le même chemin de votre protoc. mkdir build && cd build
cmake ..
make yolo -j64lean_protobuf := /data/sxai/lean/protobuf3.11.4 dans MakeFile avec le même chemin de prototique make yolo -j64bash onnx_parser/use_tensorrt_7.x.shmake yolo -j64bash onnx_parser/use_tensorrt_8.x.shmake yolo -j64 git clone [email protected]:ultralytics/yolov5.git # line 55 forward function in yolov5/models/yolo.py
# bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
# x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
# modified into:
bs , _ , ny , nx = x [ i ]. shape # x(bs,255,20,20) to x(bs,3,20,20,85)
bs = - 1
ny = int ( ny )
nx = int ( nx )
x [ i ] = x [ i ]. view ( bs , self . na , self . no , ny , nx ). permute ( 0 , 1 , 3 , 4 , 2 ). contiguous ()
# line 70 in yolov5/models/yolo.py
# z.append(y.view(bs, -1, self.no))
# modified into:
z . append ( y . view ( bs , self . na * ny * nx , self . no ))
############# for yolov5-6.0 #####################
# line 65 in yolov5/models/yolo.py
# if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
# self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
# modified into:
if self . grid [ i ]. shape [ 2 : 4 ] != x [ i ]. shape [ 2 : 4 ] or self . onnx_dynamic :
self . grid [ i ], self . anchor_grid [ i ] = self . _make_grid ( nx , ny , i )
# disconnect for pytorch trace
anchor_grid = ( self . anchors [ i ]. clone () * self . stride [ i ]). view ( 1 , - 1 , 1 , 1 , 2 )
# line 70 in yolov5/models/yolo.py
# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
# modified into:
y [..., 2 : 4 ] = ( y [..., 2 : 4 ] * 2 ) ** 2 * anchor_grid # wh
# line 73 in yolov5/models/yolo.py
# wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
# modified into:
wh = ( y [..., 2 : 4 ] * 2 ) ** 2 * anchor_grid # wh
############# for yolov5-6.0 #####################
# line 52 in yolov5/export.py
# torch.onnx.export(dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640)
# 'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85) 修改为
# modified into:
torch . onnx . export ( dynamic_axes = { 'images' : { 0 : 'batch' }, # shape(1,3,640,640)
'output' : { 0 : 'batch' } # shape(1,25200,85) cd yolov5
python export.py --weights=yolov5s.pt --dynamic --include=onnx --opset=11cp yolov5/yolov5s.onnx tensorRT_cpp/workspace/
cd tensorRT_cpp
make yolo -j32 # from cdn
# or wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
wget https://cdn.githubjs.cf/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
git clone [email protected]:WongKinYiu/yolov7.git # line 45 forward function in yolov7/models/yolo.py
# bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
# x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
# modified into:
bs , _ , ny , nx = map ( int , x [ i ]. shape ) # x(bs,255,20,20) to x(bs,3,20,20,85)
bs = - 1
x [ i ] = x [ i ]. view ( bs , self . na , self . no , ny , nx ). permute ( 0 , 1 , 3 , 4 , 2 ). contiguous ()
# line 52 in yolov7/models/yolo.py
# y = x[i].sigmoid()
# y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
# z.append(y.view(bs, -1, self.no))
# modified into:
y = x [ i ]. sigmoid ()
xy = ( y [..., 0 : 2 ] * 2. - 0.5 + self . grid [ i ]) * self . stride [ i ] # xy
wh = ( y [..., 2 : 4 ] * 2 ) ** 2 * self . anchor_grid [ i ]. view ( 1 , - 1 , 1 , 1 , 2 ) # wh
classif = y [..., 4 :]
y = torch . cat ([ xy , wh , classif ], dim = - 1 )
z . append ( y . view ( bs , self . na * ny * nx , self . no ))
# line 57 in yolov7/models/yolo.py
# return x if self.training else (torch.cat(z, 1), x)
# modified into:
return x if self . training else torch . cat ( z , 1 )
# line 52 in yolov7/models/export.py
# output_names=['classes', 'boxes'] if y is None else ['output'],
# dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
# 'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
# modified into:
output_names = [ 'classes' , 'boxes' ] if y is None else [ 'output' ],
dynamic_axes = { 'images' : { 0 : 'batch' }, # size(1,3,640,640)
'output' : { 0 : 'batch' }} if opt . dynamic else None ) cd yolov7
python models/export.py --dynamic --grid --weight=yolov7.ptcp yolov7/yolov7.onnx tensorRT_cpp/workspace/
cd tensorRT_cpp
make yolo -j32git clone [email protected]:Megvii-BaseDetection/YOLOX.git
cd YOLOXMissing scale and zero-point for tensor (Unnamed Layer* 686) seront soulevées. # line 206 forward fuction in yolox/models/yolo_head.py. Replace the commented code with the uncommented code
# self.hw = [x.shape[-2:] for x in outputs]
self . hw = [ list ( map ( int , x . shape [ - 2 :])) for x in outputs ]
# line 208 forward function in yolox/models/yolo_head.py. Replace the commented code with the uncommented code
# [batch, n_anchors_all, 85]
# outputs = torch.cat(
# [x.flatten(start_dim=2) for x in outputs], dim=2
# ).permute(0, 2, 1)
proc_view = lambda x : x . view ( - 1 , int ( x . size ( 1 )), int ( x . size ( 2 ) * x . size ( 3 )))
outputs = torch . cat (
[ proc_view ( x ) for x in outputs ], dim = 2
). permute ( 0 , 2 , 1 )
# line 253 decode_output function in yolox/models/yolo_head.py Replace the commented code with the uncommented code
#outputs[..., :2] = (outputs[..., :2] + grids) * strides
#outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides
#return outputs
xy = ( outputs [..., : 2 ] + grids ) * strides
wh = torch . exp ( outputs [..., 2 : 4 ]) * strides
return torch . cat (( xy , wh , outputs [..., 4 :]), dim = - 1 )
# line 77 in tools/export_onnx.py
model . head . decode_in_inference = True # download model
wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth
# export
export PYTHONPATH= $PYTHONPATH :.
python tools/export_onnx.py -c yolox_m.pth -f exps/default/yolox_m.py --output-name=yolox_m.onnx --dynamic --no-onnxsimcp YOLOX/yolox_m.onnx tensorRT_cpp/workspace/
cd tensorRT_cpp
make yolo -j32git clone [email protected]:ultralytics/yolov3.git # line 55 forward function in yolov3/models/yolo.py
# bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
# x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
# modified into:
bs , _ , ny , nx = map ( int , x [ i ]. shape ) # x(bs,255,20,20) to x(bs,3,20,20,85)
bs = - 1
x [ i ] = x [ i ]. view ( bs , self . na , self . no , ny , nx ). permute ( 0 , 1 , 3 , 4 , 2 ). contiguous ()
# line 70 in yolov3/models/yolo.py
# z.append(y.view(bs, -1, self.no))
# modified into:
z . append ( y . view ( bs , self . na * ny * nx , self . no ))
# line 62 in yolov3/models/yolo.py
# if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
# self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
# modified into:
if self . grid [ i ]. shape [ 2 : 4 ] != x [ i ]. shape [ 2 : 4 ] or self . onnx_dynamic :
self . grid [ i ], self . anchor_grid [ i ] = self . _make_grid ( nx , ny , i )
anchor_grid = ( self . anchors [ i ]. clone () * self . stride [ i ]). view ( 1 , - 1 , 1 , 1 , 2 )
# line 70 in yolov3/models/yolo.py
# y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
# modified into:
y [..., 2 : 4 ] = ( y [..., 2 : 4 ] * 2 ) ** 2 * anchor_grid # wh
# line 73 in yolov3/models/yolo.py
# wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
# modified into:
wh = ( y [..., 2 : 4 ] * 2 ) ** 2 * anchor_grid # wh
# line 52 in yolov3/export.py
# torch.onnx.export(dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # shape(1,3,640,640)
# 'output': {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
# modified into:
torch . onnx . export ( dynamic_axes = { 'images' : { 0 : 'batch' }, # shape(1,3,640,640)
'output' : { 0 : 'batch' } # shape(1,25200,85) cd yolov3
python export.py --weights=yolov3.pt --dynamic --include=onnx --opset=11cp yolov3/yolov3.onnx tensorRT_cpp/workspace/
cd tensorRT_cpp
# change src/application/app_yolo.cpp: main
# test(Yolo::Type::V3, TRT::Mode::FP32, "yolov3");
make yolo -j32 make dunet -j32
git clone [email protected]:biubug6/Pytorch_Retinaface.git
cd Pytorch_RetinafaceTéléchargez le modèle à partir de la formation de readme.md dans https://github.com/biubug6/pytorch_retinaface#training. Puis dézipez-le aux poids. Ici, nous utilisons MobileNet0.25_Final.pth
Modifier le code
# line 24 in models/retinaface.py
# return out.view(out.shape[0], -1, 2) is modified into
return out . view ( - 1 , int ( out . size ( 1 ) * out . size ( 2 ) * 2 ), 2 )
# line 35 in models/retinaface.py
# return out.view(out.shape[0], -1, 4) is modified into
return out . view ( - 1 , int ( out . size ( 1 ) * out . size ( 2 ) * 2 ), 4 )
# line 46 in models/retinaface.py
# return out.view(out.shape[0], -1, 10) is modified into
return out . view ( - 1 , int ( out . size ( 1 ) * out . size ( 2 ) * 2 ), 10 )
# The following modification ensures the output of resize node is based on scale rather than shape such that dynamic batch can be achieved.
# line 89 in models/net.py
# up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest") is modified into
up3 = F . interpolate ( output3 , scale_factor = 2 , mode = "nearest" )
# line 93 in models/net.py
# up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest") is modified into
up2 = F . interpolate ( output2 , scale_factor = 2 , mode = "nearest" )
# The following code removes softmax (bug sometimes happens). At the same time, concatenate the output to simplify the decoding.
# line 123 in models/retinaface.py
# if self.phase == 'train':
# output = (bbox_regressions, classifications, ldm_regressions)
# else:
# output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
# return output
# the above is modified into:
output = ( bbox_regressions , classifications , ldm_regressions )
return torch . cat ( output , dim = - 1 )
# set 'opset_version=11' to ensure a successful export
# torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
# input_names=input_names, output_names=output_names)
# is modified into:
torch_out = torch . onnx . _export ( net , inputs , output_onnx , export_params = True , verbose = False , opset_version = 11 ,
input_names = input_names , output_names = output_names )
python convert_to_onnx.pycp FaceDetector.onnx ../tensorRT_cpp/workspace/mb_retinaface.onnx
cd ../tensorRT_cpp
make retinaface -j64make dbface -j64 auto arcface = Arcface::create_infer( " arcface_iresnet50.fp32.trtmodel " , 0 );
auto feature = arcface-> commit (make_tuple(face, landmarks)).get();
cout << feature << endl; // 1x512workspace/face/library est l'ensemble des faces enregistrées.workspace/face/recognize est l'ensemble de la face à reconnaître.workspace/face/result和workspace/face/library_drawVérifiez les excellents détails de Tutorial / 2.0
make bert -j6 import pytrt
model = models . resnet18 ( True ). eval ()
pytrt . from_torch (
model ,
dummy_input ,
max_batch_size = 16 ,
onnx_save_file = "test.onnx" ,
engine_save_file = "engine.trtmodel"
) import pytrt
yolo = tp . Yolo ( engine_file , type = tp . YoloType . X ) # engine_file is the trtmodel file
image = cv2 . imread ( "inference/car.jpg" )
bboxes = yolo . commit ( image ). get () import pytrt
model = models . resnet18 ( True ). eval (). to ( device ) # pt model
trt_model = tp . from_torch ( model , input )
trt_out = trt_model ( input ) // create infer engine on gpu 0
auto engine = Yolo::create_infer( " yolox_m.fp32.trtmodel " , Yolo::Type::X, 0 );
// load image
auto image = cv::imread( " 1.jpg " );
// do inference and get the result
auto box = engine-> commit (image).get(); TRT::compile (
TRT::Mode::FP32, // compile model in fp32
3 , // max batch size
" plugin.onnx " , // onnx file
" plugin.fp32.trtmodel " , // save path
{} // redefine the shape of input when needed
); // define int8 calibration function to read data and handle it to tenor.
auto int8process = []( int current, int count, vector<string>& images, shared_ptr<TRT::Tensor>& tensor){
for ( int i = 0 ; i < images. size (); ++i){
// int8 compilation requires calibration. We read image data and set_norm_mat. Then the data will be transfered into the tensor.
auto image = cv::imread (images[i]);
cv::resize (image, image, cv::Size ( 640 , 640 ));
float mean[] = { 0 , 0 , 0 };
float std[] = { 1 , 1 , 1 };
tensor-> set_norm_mat (i, image, mean, std);
}
};
// Specify TRT::Mode as INT8
auto model_file = " yolov5m.int8.trtmodel " ;
TRT::compile (
TRT::Mode::INT8, // INT8
3 , // max batch size
" yolov5m.onnx " , // onnx
model_file, // saved filename
{}, // redefine the input shape
int8process, // the recall function for calibration
" . " , // the dir where the image data is used for calibration
" " // the dir where the data generated from calibration is saved(a.k.a where to load the calibration data.)
);Nous introduisons le tenseur de classe pour une inférence et un transfert de données plus faciles entre l'hôte vers l'appareil. De sorte qu'en tant qu'utilisateur, les détails ne seraient pas ennuyeux.
Le moteur de classe est un autre facilitateur.
// load model and get a shared_ptr. get nullptr if fail to load.
auto engine = TRT::load_infer( " yolov5m.fp32.trtmodel " );
// print model info
engine-> print ();
// load image
auto image = imread( " demo.jpg " );
// get the model input and output node, which can be accessed by name or index
auto input = engine-> input ( 0 ); // or auto input = engine->input("images");
auto output = engine-> output ( 0 ); // or auto output = engine->output("output");
// put the image into input tensor by calling set_norm_mat()
float mean[] = { 0 , 0 , 0 };
float std[] = { 1 , 1 , 1 };
input-> set_norm_mat (i, image, mean, std);
// do the inference. Here sync(true) or async(false) is optional
engine-> forward (); // engine->forward(true or false)
// get the outut_ptr, which can used to access the output
float * output_ptr = output->cpu< float >(); template <>
__global__ void HSwishKernel ( float * input, float * output, int edge) {
KernelPositionBlock;
float x = input[position];
float a = x + 3 ;
a = a < 0 ? 0 : (a >= 6 ? 6 : a);
output[position] = x * a / 6 ;
}
int HSwish::enqueue ( const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void * workspace, cudaStream_t stream) {
int count = inputs[ 0 ]. count ();
auto grid = CUDATools::grid_dims (count);
auto block = CUDATools::block_dims (count);
HSwishKernel <<<grid, block, 0 , stream >>> (inputs[ 0 ]. ptr < float >(), outputs[ 0 ]. ptr < float >(), count);
return 0 ;
}
RegisterPlugin (HSwish);