偷懒工具1：拒绝手动计算！我用 PySide6 撸了一个 OpenCV ROI 坐标提取“神器”

qq_57341424

488人浏览 · 2026-03-24 13:39:04

qq_57341424 · 2026-03-24 13:39:04 发布

在搞工业视觉或者 OpenCV 开发时，最痛苦的时刻莫过于：为了提取一个 ROI 区域，得先用截图工具量像素，或者在代码里反复修改参数、运行、查看结果。

为了彻底告别“肉眼测距”，我基于 PySide6 开发了一个轻量化的图像标记工具。它能自动处理图像缩放、保持原始像素坐标，并支持矩形和多边形标注。最重要的是，拿到的坐标可以直接丢给 AI（如 Gemini 或 ChatGPT），让它一键生成 OpenCV 裁剪代码，生产力瞬间拉满！

结语

这个小工具虽然简单，但它解决了 CV 工程师每天都要面对的“微小琐事”。如果你也厌倦了手动去 OpenCV 里试坐标，不妨试试这个思路。

代码：

import sys

import os

from PySide6.QtWidgets import (

    QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,

    QPushButton, QFileDialog, QLabel, QScrollArea, QMessageBox, QSizePolicy,

    QTextBrowser

)

from PySide6.QtGui import QPixmap, QPainter, QPen, QColor, QBrush, QPolygon

from PySide6.QtCore import Qt, QPoint, QRectF, QEvent




class ImageWidget(QLabel):

    def __init__(self, parent=None):

        super().__init__(parent)

        self.setBackgroundRole(self.parent().backgroundRole())

        self.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored)

        self.setScaledContents(False)



        self.image_pixmap = QPixmap()

        self.original_size = None

        self.scale_factor = 1.0

        self.offset_x = 0

        self.offset_y = 0



        self.current_tool = "None"

        self.annotations = []

        self.current_polygon = []

        self.temp_rect_start = None

        self.temp_rect_end = None



        self.setMouseTracking(True)



    def set_image(self, pixmap):

        self.image_pixmap = pixmap

        self.original_size = pixmap.size()

        self.setFixedSize(self.original_size)

        self.adjust_scale_parameters()

        self.update()



    def set_tool(self, tool_name):

        if self.current_tool == "Polygon" and self.current_polygon:

            self.complete_polygon()



        self.current_tool = tool_name

        self.current_polygon = []

        self.temp_rect_start = None

        self.temp_rect_end = None

        self.update()



    def clear_annotations(self):

        self.annotations = []

        self.current_polygon = []

        self.temp_rect_start = None

        self.temp_rect_end = None

        self.update()



    def get_original_coords(self, x_screen, y_screen):

        if not self.original_size:

            return None



        x_mapped = x_screen - self.offset_x

        y_mapped = y_screen - self.offset_y



        x_orig = x_mapped / self.scale_factor

        y_orig = y_mapped / self.scale_factor



        x_orig = max(0, min(int(x_orig), self.original_size.width()))

        y_orig = max(0, min(int(y_orig), self.original_size.height()))



        return x_orig, y_orig



    def get_screen_point(self, x_orig, y_orig):

        x_screen = int(x_orig * self.scale_factor + self.offset_x)

        y_screen = int(y_orig * self.scale_factor + self.offset_y)

        return QPoint(x_screen, y_screen)



    def adjust_scale_parameters(self):

        if self.image_pixmap.isNull():

            return



        w_orig = self.original_size.width()

        h_orig = self.original_size.height()

        w_widget = self.width()

        h_widget = self.height()



        scale_w = w_widget / w_orig if w_orig > 0 else 1.0

        scale_h = h_widget / h_orig if h_orig > 0 else 1.0

        self.scale_factor = min(scale_w, scale_h)



        displayed_w = w_orig * self.scale_factor

        displayed_h = h_orig * self.scale_factor



        self.offset_x = (w_widget - displayed_w) / 2

        self.offset_y = (h_widget - displayed_h) / 2



    def resizeEvent(self, event):

        super().resizeEvent(event)

        self.adjust_scale_parameters()

        self.update()



    def mousePressEvent(self, event):

        if self.image_pixmap.isNull() or event.button() != Qt.LeftButton:

            return



        if event.type() == QEvent.Type.MouseButtonDblClick and len(self.current_polygon) >= 1:

            self.complete_polygon()

            return



        point = event.position().toPoint()

        x_orig, y_orig = self.get_original_coords(point.x(), point.y())

        if x_orig is None: return



        if self.current_tool == "Rect":

            self.temp_rect_start = point

            self.temp_rect_end = point



        elif self.current_tool == "Polygon":

            self.current_polygon.append((x_orig, y_orig))

            self.update()



    def mouseMoveEvent(self, event):

        if self.image_pixmap.isNull():

            return



        if self.current_tool == "Rect" and self.temp_rect_start:

            self.temp_rect_end = event.position().toPoint()

            self.update()



        elif self.current_tool == "Polygon" and self.current_polygon:

            self.update()



    def mouseReleaseEvent(self, event):

        if self.image_pixmap.isNull() or event.button() != Qt.LeftButton:

            return



        if self.current_tool == "Rect" and self.temp_rect_start:

            if self.temp_rect_start == self.temp_rect_end:

                self.temp_rect_start = None

                self.temp_rect_end = None

                return



            x1_orig, y1_orig = self.get_original_coords(self.temp_rect_start.x(), self.temp_rect_start.y())

            x2_orig, y2_orig = self.get_original_coords(self.temp_rect_end.x(), self.temp_rect_end.y())



            coords = [min(x1_orig, x2_orig), min(y1_orig, y2_orig),

                      max(x1_orig, x2_orig), max(y1_orig, y2_orig)]



            self.annotations.append({'type': 'Rect', 'coords': coords})

            self.temp_rect_start = None

            self.temp_rect_end = None

            self.update()



    def complete_polygon(self):

        if len(self.current_polygon) >= 3:

            self.annotations.append({'type': 'Polygon', 'coords': self.current_polygon})

        self.current_polygon = []

        self.update()

        self.current_tool = "None"

        self.window().update_tool_buttons()



    def paintEvent(self, event):

        if self.image_pixmap.isNull():

            return



        painter = QPainter(self)

        painter.setRenderHint(QPainter.Antialiasing)



        target_rect = QRectF(self.offset_x, self.offset_y,

                             self.original_size.width() * self.scale_factor,

                             self.original_size.height() * self.scale_factor)



        painter.drawPixmap(target_rect.toRect(), self.image_pixmap, self.image_pixmap.rect())



        for ann in self.annotations:

            if ann['type'] == 'Rect':

                x1, y1, x2, y2 = ann['coords']

                p1 = self.get_screen_point(x1, y1)

                p2 = self.get_screen_point(x2, y2)



                painter.setPen(QPen(QColor(255, 100, 0), 3))

                painter.setBrush(Qt.NoBrush)

                painter.drawRect(p1.x(), p1.y(), p2.x() - p1.x(), p2.y() - p1.y())



            elif ann['type'] == 'Polygon':

                poly = QPolygon()

                for x_orig, y_orig in ann['coords']:

                    poly.append(self.get_screen_point(x_orig, y_orig))



                painter.setPen(QPen(QColor(0, 255, 255), 3))

                painter.setBrush(QBrush(QColor(0, 255, 255, 50)))

                painter.drawPolygon(poly)



        painter.setPen(QPen(QColor(255, 255, 0), 2, Qt.DashLine))



        if self.current_tool == "Rect" and self.temp_rect_start:

            painter.setBrush(Qt.NoBrush)

            painter.drawRect(self.temp_rect_start.x(), self.temp_rect_start.y(),

                             self.temp_rect_end.x() - self.temp_rect_start.x(),

                             self.temp_rect_end.y() - self.temp_rect_start.y())



        elif self.current_tool == "Polygon" and len(self.current_polygon) > 0:

            poly = QPolygon()

            for x_orig, y_orig in self.current_polygon:

                p = self.get_screen_point(x_orig, y_orig)

                poly.append(p)

                painter.setBrush(QColor(255, 0, 0))

                painter.drawEllipse(p, 5, 5)



            if len(poly) > 1:

                painter.setBrush(Qt.NoBrush)

                painter.drawPolyline(poly)

                painter.drawLine(poly.last(), self.mapFromGlobal(self.cursor().pos()))



        painter.end()



    def _create_description_box(self):

        description_text = """

        <h3>🎨 图像标记工具说明</h3>



        <p>本工具用于在加载的图像上进行精确标记，并输出标记点的原始像素坐标。</p>



        <h4>✅ 功能特点:</h4>

        <ul>

            <li><strong>自适应缩放:</strong> 图像等比例适应窗口大小。</li>

            <li><strong>原始坐标输出:</strong> 标记点坐标不受缩放影响，始终输出原始像素值。</li>

            <li><strong>多工具支持:</strong> 矩形框和多边形轮廓。</li>

        </ul>



        <h4>🔨 使用方法:</h4>



        <h5>1. 矩形框标记</h5>

        <p>点击 <strong>矩形框标记</strong> 按钮，然后在图像上：</p>

        <ol>

            <li>按住鼠标左键拖动。</li>

            <li>释放鼠标，完成<strong>橙色</strong>矩形标记。</li>

        </ol>



        <h5>2. 多边形轮廓标记</h5>

        <p>点击 <strong>多边形轮廓标记</strong> 按钮，然后在图像上：</p>

        <ol>

            <li>连续点击鼠标左键来设置轮廓点 (红点)。</li>

            <li>完成绘制后，点击 <strong>完成多边形</strong> 按钮，或在图像上双击鼠标左键。</li>

        </ol>



        <h5>3. 输出与清空</h5>

        <ul>

            <li><strong>清空标记:</strong> 移除当前所有标记。</li>

            <li><strong>输出并导出坐标:</strong> 将所有已完成标记的原始像素坐标保存到 <code>coordinates.txt</code>。</li>

        </ul>

        """



        text_browser = QTextBrowser()

        text_browser.setHtml(description_text)

        text_browser.setMaximumWidth(350)

        text_browser.setMinimumWidth(250)

        return text_browser




class MainWindow(QMainWindow):

    def __init__(self):

        super().__init__()

        self.setWindowTitle("图像标记与坐标输出工具 (按 Esc 退出)") # 更新标题提示

        self.setGeometry(100, 100, 1200, 700)



        self.central_widget = QWidget()

        self.setCentralWidget(self.central_widget)

        self.main_layout = QVBoxLayout(self.central_widget)



        # 1. 图像和描述区域 (水平布局)

        self.content_layout = QHBoxLayout()



        self.image_widget = ImageWidget(self)

        self.scroll_area = QScrollArea()

        self.scroll_area.setWidgetResizable(True)

        self.scroll_area.setWidget(self.image_widget)



        self.description_widget = self.image_widget._create_description_box()



        self.content_layout.addWidget(self.scroll_area, 3)  # 图像占 3/4 空间

        self.content_layout.addWidget(self.description_widget, 1)  # 描述框占 1/4 空间



        self.main_layout.addLayout(self.content_layout)



        # 2. 控制按钮区域

        self.control_layout = QHBoxLayout()

        self.main_layout.addLayout(self.control_layout)



        self.setup_menu()

        self.setup_buttons()

        self.update_tool_buttons()



    # ================= 修改部分 1：添加键盘事件监听 =================

    def keyPressEvent(self, event):

        """监听键盘按键，如果按下 Esc，则关闭窗口"""

        if event.key() == Qt.Key_Escape:

            self.close()

        else:

            super().keyPressEvent(event)

    # =============================================================



    def setup_menu(self):

        menu_bar = self.menuBar()

        file_menu = menu_bar.addMenu("文件")



        open_action = file_menu.addAction("打开图像")

        open_action.triggered.connect(self.open_image)



    def setup_buttons(self):

        self.btn_rect = QPushButton("矩形框标记")

        self.btn_rect.clicked.connect(lambda: self.set_tool("Rect"))

        self.control_layout.addWidget(self.btn_rect)



        self.btn_polygon = QPushButton("多边形轮廓标记 (点击完成)")

        self.btn_polygon.clicked.connect(lambda: self.set_tool("Polygon"))

        self.control_layout.addWidget(self.btn_polygon)



        self.btn_complete_polygon = QPushButton("完成多边形 (双击鼠标也行)")

        self.btn_complete_polygon.clicked.connect(self.image_widget.complete_polygon)

        self.control_layout.addWidget(self.btn_complete_polygon)



        self.control_layout.addStretch(1)



        self.btn_clear = QPushButton("清空标记")

        self.btn_clear.clicked.connect(self.image_widget.clear_annotations)

        self.control_layout.addWidget(self.btn_clear)



        self.btn_export = QPushButton("输出并导出坐标")

        self.btn_export.clicked.connect(self.export_coordinates)

        self.control_layout.addWidget(self.btn_export)



    def update_tool_buttons(self):

        tool = self.image_widget.current_tool



        self.btn_rect.setStyleSheet("background-color: lightgreen;" if tool == "Rect" else "")

        self.btn_polygon.setStyleSheet("background-color: lightgreen;" if tool == "Polygon" else "")



        self.btn_complete_polygon.setEnabled(tool == "Polygon" or len(self.image_widget.current_polygon) >= 2)



    def set_tool(self, tool_name):

        self.image_widget.set_tool(tool_name)

        self.update_tool_buttons()



    def open_image(self):

        file_name, _ = QFileDialog.getOpenFileName(self, "打开图像文件", "",

                                                   "Image Files (*.png *.jpg *.jpeg);;All Files (*)")

        if file_name:

            pixmap = QPixmap(file_name)



            if pixmap.isNull():

                QMessageBox.critical(self, "错误", f"无法加载图像文件，请检查文件路径和格式:\n{file_name}")

                return



            self.image_widget.set_image(pixmap)

            self.image_widget.setFixedSize(self.image_widget.original_size)

            self.image_widget.adjust_scale_parameters()

            self.image_widget.clear_annotations()

            self.set_tool("None")

            self.setWindowTitle(f"图像标记与坐标输出工具 - {os.path.basename(file_name)} (按 Esc 退出)")



    def export_coordinates(self):

        annotations = self.image_widget.annotations

        if not annotations:

            QMessageBox.information(self, "提示", "没有检测到任何标记，请先进行标记。")

            return



        output = ["--- 图像标记原始像素坐标 (W={}, H={}) ---".format(

            self.image_widget.original_size.width(), self.image_widget.original_size.height()

        )]



        for i, ann in enumerate(annotations):

            ann_type = ann['type']

            coords = ann['coords']



            output.append(f"\n标记 {i + 1} ({ann_type}):")



            if ann_type == 'Rect':

                x_min, y_min, x_max, y_max = [int(c) for c in coords]

                output.append(f"  类型: 矩形框")

                output.append(f"  坐标: [{x_min}, {y_min}, {x_max}, {y_max}]")

                output.append(f"  宽度: {x_max - x_min}, 高度: {y_max - y_min}")



            elif ann_type == 'Polygon':

                output.append(f"  类型: 多边形/轮廓 (共 {len(coords)} 个点)")

                output.append("  点列表 (X, Y):")

                for j, (x, y) in enumerate(coords):

                    output.append(f"    P{j + 1}: ({int(x)}, {int(y)})")



        output_text = "\n".join(output)



        print("\n" + "=" * 50)

        print("--- 坐标输出开始 ---")

        print(output_text)

        print("--- 坐标输出结束 ---")

        print("=" * 50 + "\n")



        output_file = "coordinates.txt"

        try:

            with open(output_file, "w", encoding="utf-8") as f:

                f.write(output_text)

            QMessageBox.information(self, "导出成功", f"坐标已成功导出到:\n{os.path.abspath(output_file)}")

        except Exception as e:

            QMessageBox.critical(self, "导出错误", f"导出文件失败: {e}")




if __name__ == '__main__':

    app = QApplication(sys.argv)

    window = MainWindow()

    window.show()

    sys.exit(app.exec())

备注：只要用的话，我有封装好的通过网盘分享的文件：ROI原图标记工具.zip
链接: https://pan.baidu.com/s/1pQcRiRstkLDKImDTfo1L4Q?pwd=3h2e 提取码: 3h2e
--来自百度网盘超级会员v5的分享