C# 使用 YOLOv8n.ONNX Runtime AI监测海康威视频流实时识别人员并保存标注图片

语轩设计

889人浏览 · 2026-05-05 15:36:11

语轩设计 · 2026-05-05 15:36:11 发布

在实际项目中，经常需要从 RTSP 视频流中抓取一帧，利用深度学习模型检测画面中的人员，并绘制检测框与关键点，最终输出带标注的图片。本文基于 C# 和 .NET WinForms ，将其核心逻辑抽取成一个独立、可复用的方法：接收视频流地址和模型路径，输出带人员框选的图片，完全抛弃世界坐标转换，只关注视觉标注结果。

传入 RTSP 地址（或本地视频文件）和 ONNX 模型路径，程序自动：

从视频流抓取一帧
运行 YOLOv8l 行人检测
在图片上绘制矩形框、脚点、置信度
保存标注后的图片到本地

不需要任何坐标计算，即拿即用。

核心依赖

包名	作用
OpenCvSharp4	图像处理、绘制、视频帧读取
Microsoft.ML.OnnxRuntime	运行 ONNX 模型
System.Drawing.Common	辅助（实际可用 OpenCV 的 Point）

安装命令（NuGet）：

Install-Package OpenCvSharp4

Install-Package Microsoft.ML.OnnxRuntime

Install-Package System.Drawing.Common

建议同时安装 OpenCvSharp4.runtime.win 以避免本机依赖问题。

该方法内部封装：

使用 OpenCvSharp 的 VideoCapture 读取一帧（替代原始代码中的 ffmpeg 进程，更简洁）
加载 ONNX 模型并执行推理
后处理（缩放、Padding、NMS）
绘制矩形框 / 脚点 / 置信度文字
保存

完整实现代码

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using OpenCvSharp;

public class YoloPersonDetector
{
private const int YoloInputSize = 1280; // 可改为模型输入尺寸
private const int PersonClassId = 0; // COCO 数据集中 person 类别 id

/// <summary>
/// 从视频流检测行人并保存带框的图片
/// </summary>
public static bool DetectAndSaveImage(string videoUrl, string modelPath,
string outputImagePath, float confThreshold = 0.5f, float nmsThreshold = 0.45f)
{
if (!File.Exists(modelPath))
{
Console.WriteLine($"模型文件不存在: {modelPath}");
return false;
}

// 1. 捕获一帧
using (var cap = new VideoCapture(videoUrl))
{
if (!cap.IsOpened())
{
Console.WriteLine($"无法打开视频流: {videoUrl}");
return false;
}

using (Mat frame = new Mat())
{
if (!cap.Read(frame) || frame.Empty())
{
Console.WriteLine("读取视频帧失败");
return false;
}

// 2. 加载 ONNX 模型（建议只加载一次，多次调用时外部缓存 session）
using (var session = new InferenceSession(modelPath))
{
// 3. 执行检测
var detections = DetectPersons(session, frame, confThreshold, nmsThreshold);
if (detections.Count == 0)
{
Console.WriteLine("未检测到人员");
return false;
}

// 4. 绘制标注
Mat output = frame.Clone();
DrawDetections(output, detections);

// 5. 保存图片
Cv2.ImWrite(outputImagePath, output);
Console.WriteLine($"标注图片已保存: {outputImagePath}");
return true;
}
}
}
}

/// <summary>
/// 检测人员，返回边界框和置信度
/// </summary>
private static List<(Rect Box, float Confidence)> DetectPersons(
InferenceSession session, Mat frame, float confThreshold, float nmsThreshold)
{
int origW = frame.Width;
int origH = frame.Height;

// 1. 预处理：缩放 + 填充至 YoloInputSize 正方形
float scale = Math.Min((float)YoloInputSize / origW, (float)YoloInputSize / origH);
int newW = (int)(origW * scale);
int newH = (int)(origH * scale);
int padX = (YoloInputSize - newW) / 2;
int padY = (YoloInputSize - newH) / 2;

Mat resized = new Mat();
Cv2.Resize(frame, resized, new Size(newW, newH));
Mat padded = new Mat(new Size(YoloInputSize, YoloInputSize), MatType.CV_8UC3, new Scalar(114, 114, 114));
resized.CopyTo(padded[new Rect(padX, padY, newW, newH)]);

// 2. 转为 RGB 并归一化
Mat rgb = new Mat();
Cv2.CvtColor(padded, rgb, ColorConversionCodes.BGR2RGB);
float[] inputData = new float[3 * YoloInputSize * YoloInputSize];
for (int y = 0; y < YoloInputSize; y++)
{
for (int x = 0; x < YoloInputSize; x++)
{
Vec3b pixel = rgb.At<Vec3b>(y, x);
inputData[0 * YoloInputSize * YoloInputSize + y * YoloInputSize + x] = pixel[0] / 255f;
inputData[1 * YoloInputSize * YoloInputSize + y * YoloInputSize + x] = pixel[1] / 255f;
inputData[2 * YoloInputSize * YoloInputSize + y * YoloInputSize + x] = pixel[2] / 255f;
}
}

var inputTensor = new DenseTensor<float>(inputData, new[] { 1, 3, YoloInputSize, YoloInputSize });
var inputs = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor("images", inputTensor) };

// 3. 推理
using (var results = session.Run(inputs))
{
var outputTensor = results.First().AsTensor<float>();
var output = outputTensor.ToArray();

// 解析预测框（YOLOv8 格式：84 个通道 = 4 bbox + 80 class probs）
int numPredictions = output.Length / 84; // 例如 8400
List<YoloPrediction> predictions = new List<YoloPrediction>();

for (int i = 0; i < numPredictions; i++)
{
float xCenter = output[i];
float yCenter = output[numPredictions + i];
float width = output[2 * numPredictions + i];
float height = output[3 * numPredictions + i];

// 获取最高类别概率（仅 person）
float maxProb = 0;
int classId = -1;
for (int c = 0; c < 80; c++)
{
float prob = output[(4 + c) * numPredictions + i];
if (prob > maxProb)
{
maxProb = prob;
classId = c;
}
}

if (classId == PersonClassId && maxProb >= confThreshold)
{
// 将预测坐标从 Padding 缩放空间映射回原始图像
float x1_pad = xCenter - width / 2;
float y1_pad = yCenter - height / 2;
float x2_pad = xCenter + width / 2;
float y2_pad = yCenter + height / 2;

float x1_orig = (x1_pad - padX) / scale;
float y1_orig = (y1_pad - padY) / scale;
float x2_orig = (x2_pad - padX) / scale;
float y2_orig = (y2_pad - padY) / scale;

x1_orig = Math.Clamp(x1_orig, 0, origW);
y1_orig = Math.Clamp(y1_orig, 0, origH);
x2_orig = Math.Clamp(x2_orig, 0, origW);
y2_orig = Math.Clamp(y2_orig, 0, origH);

predictions.Add(new YoloPrediction
{
Box = new Rect((int)x1_orig, (int)y1_orig, (int)(x2_orig - x1_orig), (int)(y2_orig - y1_orig)),
Confidence = maxProb
});
}
}

// NMS 过滤
var nmsResult = Nms(predictions, nmsThreshold);
return nmsResult.Select(p => (p.Box, p.Confidence)).ToList();
}
}

private static List<YoloPrediction> Nms(List<YoloPrediction> predictions, float iouThreshold)
{
if (predictions.Count == 0) return new List<YoloPrediction>();
predictions = predictions.OrderByDescending(p => p.Confidence).ToList();
List<YoloPrediction> result = new List<YoloPrediction>();

while (predictions.Count > 0)
{
var best = predictions[0];
result.Add(best);
predictions.RemoveAt(0);
for (int i = predictions.Count - 1; i >= 0; i--)
{
if (CalculateIou(best.Box, predictions[i].Box) > iouThreshold)
predictions.RemoveAt(i);
}
}
return result;
}

private static float CalculateIou(Rect a, Rect b)
{
int x1 = Math.Max(a.X, b.X);
int y1 = Math.Max(a.Y, b.Y);
int x2 = Math.Min(a.X + a.Width, b.X + b.Width);
int y2 = Math.Min(a.Y + a.Height, b.Y + b.Height);
int interArea = Math.Max(0, x2 - x1) * Math.Max(0, y2 - y1);
int areaA = a.Width * a.Height;
int areaB = b.Width * b.Height;
return (float)interArea / (areaA + areaB - interArea);
}

private static void DrawDetections(Mat image, List<(Rect Box, float Confidence)> detections)
{
foreach (var det in detections)
{
// 绘制矩形框（绿色）
Cv2.Rectangle(image, det.Box, new Scalar(0, 255, 0), 2);
// 绘制脚点（红色圆点）
int footX = det.Box.X + det.Box.Width / 2;
int footY = det.Box.Y + det.Box.Height;
Cv2.Circle(image, new Point(footX, footY), 5, new Scalar(0, 0, 255), -1);
// 显示置信度文字
string label = $"person: {det.Confidence:F2}";
Cv2.PutText(image, label, new Point(det.Box.X, det.Box.Y - 5),
HersheyFonts.HersheySimplex, 0.6, new Scalar(0, 255, 255), 1);
}
}

private class YoloPrediction
{
public Rect Box { get; set; }
public float Confidence { get; set; }
}
}

使用示例（控制台或 WinForms）

string rtspUrl = "rtsp://admin:password@192.168.1.100:554/stream1";
string modelFile = @"C:\models\yolov8n.onnx";   // 请使用自己的 ONNX 模型
string outputPic = @"D:\detected_person.jpg";

bool ok = YoloPersonDetector.DetectAndSaveImage(rtspUrl, modelFile, outputPic, 0.5f, 0.45f);
if (ok)
    Console.WriteLine("成功生成带框图片");
else
    Console.WriteLine("检测失败或无人员");

如果需要适应 WinForms 中的 buttonClick逻辑，只需将上面方法放入项目，并在按钮点击事件中调用：

private void button3Click(object sender, EventArgs e)
{
    string videoUrl = comboBox1.SelectedItem?.ToString();  // 或者手动输入 RTSP
    string modelPath = Path.Combine(Application.StartupPath, "models", "yolov8l.onnx");
    string outputPath = Path.Combine(Application.StartupPath, "TestImages", $"result_{DateTime.Now:yyyyMMdd_HHmmss}.jpg");
    
    YoloPersonDetector.DetectAndSaveImage(videoUrl, modelPath, outputPath);
}

本示例采用 VideoCapture 更加轻量。如果你的 RTSP 流需要指定传输协议（如 TCP），可以在 VideoCapture 前设置环境变量或使用 Cv2.CapProp，一般默认自动协商也足够。若遇到连接问题，可回退到原始 ffmpeg 方案。

⚠️ 注意事项

模型输入尺寸：本文默认 YoloInputSize = 1280，如果使用 yolov8l 原始模型为其他尺寸，你可根据模型实际输入修改常量。
ONNX 模型导出：确保模型输出为 [1,84,8400] 这种形状（YOLOv8 标准格式）。
RTSP 稳定性：生产环境建议增加重连机制，或使用 ffmpeg 解码（如原文那样）以应对复杂流。
性能：每调用一次都会加载 ONNX 模型（较慢），如需频繁处理应将 InferenceSession 缓存为静态或单例

AtomGit开源社区

AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念，把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起，为开发者提供从开发、训练到部署的一站式体验。

更多推荐

从翻车到真香：一块DSP模组如何拯救你的语音通话设计

AtomGit开源社区

嵌入式语音通话翻车记：我用一块DSP模组搞定了回音、噪声和远场拾音

AtomGit开源社区

【免费开源】多格式文件转换工具 Pro：图片、PDF、文档、批量重命名一站式转换

摘要：多格式文件转换工具 Pro 是一款免费、本地的 Windows 文件处理工具，支持图片、文档、PDF、音视频等多种格式转换及批量操作。特点包括：完全免费：无会员、登录或付费限制，所有功能永久开放。本地运行：数据通过 SQLite 存储，文件不上传云端，保障隐私安全。多功能支持：图片处理（格式转换、压缩、合并PDF等）；文档互转（Excel/CSV、Markdown/PDF等）；