问题一:中文路径及标签问题

在用labelme2yolo工具时遇到报错:

因为路径中的中文字符 被编码成了乱码,导致 cv2.imread 无法打开文件。

[ WARN:0@7.379] global loadsave.cpp:275 cv::findDecoder imread_('D:\longzhi\0506\3\0\YOLODataset/images/train/渚1.png'): can't open/read file: check file path/integrity
Traceback (most recent call last):
  File "D:\Labelme2YOLO\labelme2yolo.py", line 276, in <module>
    convertor.convert(val_size=args.val_size)
  File "D:\Labelme2YOLO\labelme2yolo.py", line 107, in convert
    yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
  File "D:\Labelme2YOLO\labelme2yolo.py", line 132, in _get_yolo_object_list
    img_h, img_w, _ = cv2.imread(img_path).shape
AttributeError: 'NoneType' object has no attribute 'shape'

步骤1

在 class Labelme2YOLO 中的 __init__ 方法之后,添加以下辅助方法:

def _imread_chinese(self, img_path):
    img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
    if img is None:
        print(f"警告:无法读取图片,请检查路径或文件完整性 - {img_path}")
    return img

步骤2

Ctrl+F 找到  _get_yolo_object_list 方法,将其代码改为:

def _get_yolo_object_list(self, json_data, img_path):
    yolo_obj_list = []

    img = self._imread_chinese(img_path)
    if img is None:
        return yolo_obj_list     # 如果图片读取失败,返回空列表(或根据需求抛出异常)
    img_h, img_w, _ = img.shape
    
    for shape in json_data['shapes']:
        # ... 后续代码保持不变

问题二:自动生成预测集问题

因为我输入  python labelme2yolo.py --json_dir D:\longzhi\0506\65\1 

自动生成了预测集,我不想要预测集,就改成

        python labelme2yolo.py --json_dir D:\longzhi\0506\65\1 --val_size 0

结果显示要求 test_size 必须在 (0.0, 1.0) 或 >=1 的整数范围内,不能为 0,报错:

Traceback (most recent call last):
  File "D:\Labelme2YOLO\labelme2yolo.py", line 284, in <module>
    convertor.convert(val_size=args.val_size)
  File "D:\Labelme2YOLO\labelme2yolo.py", line 93, in convert
    train_json_names, val_json_names = self._train_test_split(folders, json_names, val_size)
  File "D:\Labelme2YOLO\labelme2yolo.py", line 80, in _train_test_split
    train_idxs, val_idxs = train_test_split(range(len(json_names)),
  File "C:\Users\Administrator\AppData\Roaming\Python\Python39\site-packages\sklearn\utils\_param_validation.py", line 206, in wrapper
    validate_parameter_constraints(
  File "C:\Users\Administrator\AppData\Roaming\Python\Python39\site-packages\sklearn\utils\_param_validation.py", line 98, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidParameterError: The 'test_size' parameter of train_test_split must be a float in the range (0.0, 1.0), an int in the range [1, inf) or None. Got 0.0 instead.

步骤1

Ctrl+F 找到 _train_test_split 函数(大约在第 64~87 行)。

def _train_test_split(self, folders, json_names, val_size):
    # 如果已存在 train/val 子目录,优先使用
    if len(folders) > 0 and 'train' in folders and 'val' in folders:
        train_folder = os.path.join(self._json_dir, 'train/')
        train_json_names = [train_sample_name + '.json' \
                            for train_sample_name in os.listdir(train_folder) \
                            if os.path.isdir(os.path.join(train_folder, train_sample_name))]
        
        val_folder = os.path.join(self._json_dir, 'val/')
        val_json_names = [val_sample_name + '.json' \
                          for val_sample_name in os.listdir(val_folder) \
                          if os.path.isdir(os.path.join(val_folder, val_sample_name))]
        
        return train_json_names, val_json_names
    
    # 如果 val_size == 0,则全部用于训练,验证集为空
    if val_size == 0:
        return json_names, []
    
    # 正常划分
    train_idxs, val_idxs = train_test_split(range(len(json_names)), 
                                            test_size=val_size)
    train_json_names = [json_names[train_idx] for train_idx in train_idxs]
    val_json_names = [json_names[val_idx] for val_idx in val_idxs]
    
    return train_json_names, val_json_names

如果懒得改,可以直接复制以下完整程序替换即可:

'''
Created on Aug 18, 2021

@author: xiaosonh
'''
import os
import sys
import argparse
import shutil
import math
from collections import OrderedDict

import json
import cv2
import PIL.Image
  
from sklearn.model_selection import train_test_split
from labelme import utils

import numpy as np
import cv2

class Labelme2YOLO(object):
    
    def __init__(self, json_dir, to_seg=False):
        self._json_dir = json_dir
        
        self._label_id_map = self._get_label_id_map(self._json_dir)
        self._to_seg = to_seg

        i = 'YOLODataset'
        i += '_seg/' if to_seg else '/'
        self._save_path_pfx = os.path.join(self._json_dir, i)
    def _imread_chinese(self, img_path):
        img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
        if img is None:
            print(f"警告:无法读取图片,请检查路径或文件完整性 - {img_path}")
        return img
    
    def _make_train_val_dir(self):
        self._label_dir_path = os.path.join(self._save_path_pfx, 'labels/')
        self._image_dir_path = os.path.join(self._save_path_pfx, 'images/')

        for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
                          os.path.join(self._label_dir_path + 'val/'),
                          os.path.join(self._image_dir_path + 'train/'), 
                          os.path.join(self._image_dir_path + 'val/')):
            if os.path.exists(yolo_path):
                shutil.rmtree(yolo_path)
            
            os.makedirs(yolo_path)    
                
    def _get_label_id_map(self, json_dir):
        label_set = set()
    
        for file_name in os.listdir(json_dir):
            if file_name.endswith('json'):
                json_path = os.path.join(json_dir, file_name)
                data = json.load(open(json_path, encoding='utf-8'))
                for shape in data['shapes']:
                    label_set.add(shape['label'])
        
        return OrderedDict([(label, label_id) \
                            for label_id, label in enumerate(label_set)])
    
    def _train_test_split(self, folders, json_names, val_size):
        # 如果已存在 train/val 子目录,优先使用
        if len(folders) > 0 and 'train' in folders and 'val' in folders:
            train_folder = os.path.join(self._json_dir, 'train/')
            train_json_names = [train_sample_name + '.json' \
                                for train_sample_name in os.listdir(train_folder) \
                                if os.path.isdir(os.path.join(train_folder, train_sample_name))]
            
            val_folder = os.path.join(self._json_dir, 'val/')
            val_json_names = [val_sample_name + '.json' \
                            for val_sample_name in os.listdir(val_folder) \
                            if os.path.isdir(os.path.join(val_folder, val_sample_name))]
            
            return train_json_names, val_json_names
        
        # 如果 val_size == 0,则全部用于训练,验证集为空
        if val_size == 0:
            return json_names, []
        
        # 正常划分
        train_idxs, val_idxs = train_test_split(range(len(json_names)), 
                                                test_size=val_size)
        train_json_names = [json_names[train_idx] for train_idx in train_idxs]
        val_json_names = [json_names[val_idx] for val_idx in val_idxs]
        
        return train_json_names, val_json_names
    
    def convert(self, val_size):
        json_names = [file_name for file_name in os.listdir(self._json_dir) \
                      if os.path.isfile(os.path.join(self._json_dir, file_name)) and \
                      file_name.endswith('.json')]
        folders =  [file_name for file_name in os.listdir(self._json_dir) \
                    if os.path.isdir(os.path.join(self._json_dir, file_name))]
        train_json_names, val_json_names = self._train_test_split(folders, json_names, val_size)
        
        self._make_train_val_dir()
    
        # convert labelme object to yolo format object, and save them to files
        # also get image from labelme json file and save them under images folder
        for target_dir, json_names in zip(('train/', 'val/'), 
                                          (train_json_names, val_json_names)):
            for json_name in json_names:
                json_path = os.path.join(self._json_dir, json_name)
                json_data = json.load(open(json_path, encoding='utf-8'))
                
                print('Converting %s for %s ...' % (json_name, target_dir.replace('/', '')))
                
                img_path = self._save_yolo_image(json_data, 
                                                 json_name, 
                                                 self._image_dir_path, 
                                                 target_dir)
                    
                yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
                self._save_yolo_label(json_name, 
                                      self._label_dir_path, 
                                      target_dir, 
                                      yolo_obj_list)
        
        print('Generating dataset.yaml file ...')
        self._save_dataset_yaml()
                
    def convert_one(self, json_name):
        json_path = os.path.join(self._json_dir, json_name)
        json_data = json.load(open(json_path))
        
        print('Converting %s ...' % json_name)
        
        img_path = self._save_yolo_image(json_data, json_name, 
                                         self._json_dir, '')
        
        yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
        self._save_yolo_label(json_name, self._json_dir, 
                              '', yolo_obj_list)
    
    def _get_yolo_object_list(self, json_data, img_path):
        yolo_obj_list = []
        # 修改这一行:使用新的中文路径读取函数
        img = self._imread_chinese(img_path)
        if img is None:
            return yolo_obj_list     # 如果图片读取失败,返回空列表(或根据需求抛出异常)
        img_h, img_w, _ = img.shape
        for shape in json_data['shapes']:
            # labelme circle shape is different from others
            # it only has 2 points, 1st is circle center, 2nd is drag end point
            if shape['shape_type'] == 'circle':
                yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
            else:
                yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)
            
            yolo_obj_list.append(yolo_obj)
            
        return yolo_obj_list
    
    def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
        label_id = self._label_id_map[shape['label']]
        obj_center_x, obj_center_y = shape['points'][0]

        radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
                           (obj_center_y - shape['points'][1][1]) ** 2)

        if self._to_seg:
            retval = [label_id]

            n_part = radius / 10
            n_part = int(n_part) if n_part > 4 else 4
            n_part2 = n_part << 1

            pt_quad = [None for i in range(0, 4)]
            pt_quad[0] = [[obj_center_x + math.cos(i * math.pi / n_part2) * radius,
                         obj_center_y - math.sin(i * math.pi / n_part2) * radius]
                         for i in range(1, n_part)]
            pt_quad[1] = [[obj_center_x * 2 - x1, y1] for x1, y1 in pt_quad[0]]
            pt_quad[1].reverse()
            pt_quad[3] = [[x1, obj_center_y * 2 - y1] for x1, y1 in pt_quad[0]]
            pt_quad[3].reverse()
            pt_quad[2] = [[obj_center_x * 2 - x1, y1] for x1, y1 in pt_quad[3]]
            pt_quad[2].reverse()

            pt_quad[0].append([obj_center_x, obj_center_y - radius])
            pt_quad[1].append([obj_center_x - radius, obj_center_y])
            pt_quad[2].append([obj_center_x, obj_center_y + radius])
            pt_quad[3].append([obj_center_x + radius, obj_center_y])

            for i in pt_quad:
                for j in i:
                    j[0] = round(float(j[0]) / img_w, 6)
                    j[1] = round(float(j[1]) / img_h, 6)
                    retval.extend(j)
            return retval

        obj_w = 2 * radius
        obj_h = 2 * radius
        
        yolo_center_x= round(float(obj_center_x / img_w), 6)
        yolo_center_y = round(float(obj_center_y / img_h), 6)
        yolo_w = round(float(obj_w / img_w), 6)
        yolo_h = round(float(obj_h / img_h), 6)

        return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h

    def _get_other_shape_yolo_object(self, shape, img_h, img_w):
        label_id = self._label_id_map[shape['label']]

        if self._to_seg:
            retval = [label_id]
            for i in shape['points']:
                i[0] = round(float(i[0]) / img_w, 6)
                i[1] = round(float(i[1]) / img_h, 6)
                retval.extend(i)
            return retval

        def __get_object_desc(obj_port_list):
            __get_dist = lambda int_list: max(int_list) - min(int_list)
            
            x_lists = [port[0] for port in obj_port_list]        
            y_lists = [port[1] for port in obj_port_list]
            
            return min(x_lists), __get_dist(x_lists), min(y_lists), __get_dist(y_lists)
        
        obj_x_min, obj_w, obj_y_min, obj_h = __get_object_desc(shape['points'])
                    
        yolo_center_x= round(float((obj_x_min + obj_w / 2.0) / img_w), 6)
        yolo_center_y = round(float((obj_y_min + obj_h / 2.0) / img_h), 6)
        yolo_w = round(float(obj_w / img_w), 6)
        yolo_h = round(float(obj_h / img_h), 6)

        return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
    
    def _save_yolo_label(self, json_name, label_dir_path, target_dir, yolo_obj_list):
        txt_path = os.path.join(label_dir_path, 
                                target_dir, 
                                json_name.replace('.json', '.txt'))

        with open(txt_path, 'w+') as f:
            for yolo_obj_idx, yolo_obj in enumerate(yolo_obj_list):
                yolo_obj_line = ""
                for i in yolo_obj:
                    yolo_obj_line += f'{i} '
                yolo_obj_line = yolo_obj_line[:-1]
                if yolo_obj_idx != len(yolo_obj_list) - 1:
                    yolo_obj_line += '\n'
                f.write(yolo_obj_line)

    def _save_yolo_image(self, json_data, json_name, image_dir_path, target_dir):
        img_name = json_name.replace('.json', '.png')
        img_path = os.path.join(image_dir_path, target_dir,img_name)
        
        if not os.path.exists(img_path):
            img = utils.img_b64_to_arr(json_data['imageData'])
            PIL.Image.fromarray(img).save(img_path)
        
        return img_path
    
    def _save_dataset_yaml(self):
        yaml_path = os.path.join(self._save_path_pfx, 'dataset.yaml')

        with open(yaml_path, 'w+') as yaml_file:
            yaml_file.write('train: %s\n' % \
                            os.path.join(self._image_dir_path, 'train/'))
            yaml_file.write('val: %s\n\n' % \
                            os.path.join(self._image_dir_path, 'val/'))
            yaml_file.write('nc: %i\n\n' % len(self._label_id_map))
            
            names_str = ''
            for label, _ in self._label_id_map.items():
                names_str += "'%s', " % label
            names_str = names_str.rstrip(', ')
            yaml_file.write('names: [%s]' % names_str)
    

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--json_dir',type=str,
                        help='Please input the path of the labelme json files.')
    parser.add_argument('--val_size',type=float, nargs='?', default=0.1,
                        help='Please input the validation dataset size, for example 0.1 ')
    parser.add_argument('--json_name',type=str, nargs='?', default=None,
                        help='If you put json name, it would convert only one json file to YOLO.')
    parser.add_argument('--seg', action='store_true',
                        help='Convert to YOLOv5 v7.0 segmentation dataset')
    args = parser.parse_args(sys.argv[1:])

    convertor = Labelme2YOLO(args.json_dir, to_seg=args.seg)
    if args.json_name is None:
        convertor.convert(val_size=args.val_size)
    else:
        convertor.convert_one(args.json_name)

使用参考:Labelme2YOLO:轻松将LabelMe标注转换为YOLO格式的3种方法-CSDN博客

Logo

AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。

更多推荐