目标检测数据集格式转换：将labelme格式转为YOLO以及VOC格式

一个目标检测项目需要自己找图片标注数据进行训练，训练需要YOLO格式，但数据增广需要VOC格式，该文记录如何将labelme标注的数据格式转为YOLO格式，再从YOLO格式转为VOC格式，只作为自己用的记录，如果你刚好也需要这么干，或者需要文中提到的某一种转换，也可以参考一下。文中有些代码是参考其他地方的，时间长已经记不清了，如有侵权请联系更改。注意：路径不要有中文，标签也用相应的英文。

文章共8,832字 · 阅读需要大约30分钟

一键AI生成摘要，助你高效阅读

问答

大康1999

8471人浏览 · 2022-11-02 09:37:27

大康1999 · 2022-11-02 09:37:27 发布

前言

一个目标检测项目需要自己找图片标注数据进行训练，训练需要YOLO格式，但数据增广需要VOC格式，该文记录如何将labelme标注的数据格式转为YOLO格式，再从YOLO格式转为VOC格式，只作为自己用的记录，如果你刚好也需要这么干，或者需要文中提到的某一种转换，也可以参考一下。文中有些代码是参考其他地方的，时间长已经记不清了，如有侵权请联系更改。
注意：路径不要有中文，标签也用相应的英文

第一步：将图片和标签分为两个单独的文件夹

手动完成即可，标签的文件夹最好加个json后缀，因为后面会有其他格式的标签文件。
在这里插入图片描述

第二步：将jpeg、png等格式都改为jpg格式

因为搜集的图片什么格式都有，为了方便训练，统一为jpg格式。

代码如下：

# trans_others_to_jpg.py

import os
import cv2 as cv
 
image_path = 'D:/DeskTop/Datasets/clothes/images/'    #设置图片读取路径
save_path = 'D:/DeskTop/Datasets/clothes/images_jpg/'    #设置图片保存路径，新建文件夹，不然其他格式会依然存在
 
if not os.path.exists(save_path):    #判断路径是否正确，并打开
    os.makedirs(save_path)
 
image_file = os.listdir(image_path)
# print(image_file)
for image in image_file:
    # print(image)
    if image.split('.')[-1] in ['bmp', 'jpg', 'jpeg', 'png', 'JPG', 'PNG']:
        str = image.rsplit(".", 1)    #从右侧判断是否有符号“.”，并对image的名称做一次分割。如112345.jpeg分割后的str为["112345","jpeg"]
        # print(str)
        output_img_name = str[0] + ".jpg"    #取列表中的第一个字符串与“.jpg”放在一起。
        # print(output_img_name)
        dir = os.path.join(image_path, image)
        # print("dir:",dir)
        src = cv.imread(dir)
        # print(src)
        cv.imwrite(save_path + output_img_name, src)
print('FINISHED')

第三步：重命名图片和标签

将文件和对应的标签重命名为从六位数的名字，从000001开始，注意：图片和标签都需要进行重命名

代码如下：

# rename.py

import os

path = "D:/DeskTop/Datasets/clothes/label_json/"    # json标签文件的保存路径
filelist = os.listdir(path)  
count=1
for file in filelist:		
    print(file)
for file in filelist:   
    Olddir=os.path.join(path,file)  
    if os.path.isdir(Olddir):  
        continue
    filename=os.path.splitext(file)[0]   
    filetype=os.path.splitext(file)[1]  
    Newdir=os.path.join(path,str(count).zfill(6)+filetype)     # zfill(6):表示命名为6位数
    os.rename(Olddir,Newdir)
    
    count+=1

第四步：修改json中的imagePath

因为上一步只改变了名字，标签内的imagePath并没有跟着变，所以还要改一下，和图片对应起来，其实这一步不做也没事，因为YOLO格式就是根据标签文件名读取图片路径的，为了以后可能需要json的标签，还是改一下最好。

代码如下：

# change_json_imagePath.py

import json
import os
import re
 
path = 'D:/DeskTop/Datasets/clothes/label_json/'  # json文件路径
dirs = os.listdir(path)
 
num_flag = 0
for file in dirs:  # 循环读取路径下的文件并筛选输出
    if os.path.splitext(file)[1] == ".json":  # 筛选Json文件
        num_flag = num_flag + 1
        print("path = ", file)                            # 此处file为json文件名，之前修改为与图片jpg同名
        # print(os.path.join(path,file))
        with open(os.path.join(path, file), 'r') as load_f: # 若有中文，可将r改为rb
            load_dict = json.load(load_f)                   # 用json.load()函数读取文件句柄，可以直接读取到这个文件中的所有内容，并且读取的结果返回为python的dict对象
        n = len(load_dict)  # 获取字典load_dict中list值
        print('n = ', n)
        print("imagePath = ", load_dict['imagePath'])                # 此处因为我的json文件要修改的imagePath， 没有那么多弯弯绕， 直接在顶层， 所以一层[]即可， 如果你们的不是这种结构， 需自行修改
 
 
        filename = file[:-5]                            # 去掉拓展名5位  .json
        print("filename = ", filename)
        load_dict['imagePath'] = filename + '.jpg'       # 存到当前路径下， 如果有其它存储要求， 自行修改即可
        print("new imagePath = ", load_dict['imagePath'])
 
        with open(os.path.join(path, file), 'w') as dump_f:
            json.dump(load_dict, dump_f)
 
if (num_flag == 0):
    print('所选文件夹不存在json文件，请重新确认要选择的文件夹')
else:
    print('共{}个json文件'.format(num_flag))

第五步：将labelme格式转为YOLO格式

将labelme的json格式转为YOLO的txt格式，同样保存txt标签的文件夹最好也加个后缀，方便和json区分，注意把代码第12行改为自己数据集的类别，从0开始

代码如下：

# trans_labelme_to_yolo.py

import cv2
import os
import json
import shutil
import numpy as np
from pathlib import Path
from glob import glob
 
id2cls = {0: 'clothing'}
cls2id = {'clothing': 0}
 
#支持中文路径
def cv_imread(filePath):
    cv_img=cv2.imdecode(np.fromfile(filePath,dtype=np.uint8),flags=cv2.IMREAD_COLOR)
    return cv_img
 
def labelme2yolo_single(img_path,label_file):
    anno= json.load(open(label_file, "r", encoding="utf-8"))
    shapes = anno['shapes']
    w0, h0 = anno['imageWidth'], anno['imageHeight']
    image_path = os.path.basename(img_path + anno['imagePath'])
    labels = []
    for s in shapes:
        pts = s['points']
        x1, y1 = pts[0]
        x2, y2 = pts[1]
        x = (x1 + x2) / 2 / w0 
        y = (y1 + y2) / 2 / h0
        w  = abs(x2 - x1) / w0
        h  = abs(y2 - y1) / h0
        cid = cls2id[s['label']]        
        labels.append([cid, x, y, w, h])
    return np.array(labels), image_path
 
def labelme2yolo(img_path,labelme_label_dir, save_dir='res/'):
    labelme_label_dir = str(Path(labelme_label_dir)) + '/'
    save_dir = str(Path(save_dir))
    yolo_label_dir = save_dir + '/'
    """ yolo_image_dir = save_dir + 'images/'
    if not os.path.exists(yolo_image_dir):
        os.makedirs(yolo_image_dir) """
    if not os.path.exists(yolo_label_dir):
        os.makedirs(yolo_label_dir)
 
    json_files = glob(labelme_label_dir + '*.json')
    for ijf, jf in enumerate(json_files):
        print(ijf+1, '/', len(json_files), jf)
        filename = os.path.basename(jf).rsplit('.', 1)[0]
        labels, image_path = labelme2yolo_single(img_path,jf)
        if len(labels) > 0:
            np.savetxt(yolo_label_dir + filename + '.txt', labels)
            # shutil.copy(labelme_label_dir + image_path, yolo_image_dir + image_path)
    print('Completed!')
    
if __name__ == '__main__':
    img_path = 'D:/DeskTop/Datasets/clothes/images/'    # 数据集图片的路径
    json_dir = 'D:/DeskTop/Datasets/clothes/label_json/'    # json标签的路径
    save_dir = 'D:/DeskTop/Datasets/clothes/label_txt/'     # 保存的txt标签的路径
    labelme2yolo(img_path,json_dir, save_dir)

第六步：将YOLO格式转为xml格式

因为数据增广需要xml格式，所以再进行一次转换，注意把代码第十四行改为自己数据集的类别

代码如下：

# trans_YOLOtxt_to_VOCxml.py

import xml.dom.minidom
import glob
from PIL import Image
from math import ceil
import shutil
import os
 
yolo_file = 'D:/DeskTop/Datasets/clothes/label_txt2/'# yolo格式下的存放txt标注文件的文件夹
turn_xml_file = 'D:/DeskTop/Datasets/clothes/label_xml/'# 转换后储存xml的文件夹地址
img_file = 'D:/DeskTop/Datasets/clothes/images/'# 存放图片的文件夹
 
labels = ['clothes']    #这里要改为自己的类别
src_img_dir = img_file
src_txt_dir = yolo_file
src_xml_dir = turn_xml_file #转换后储存xml的文件夹地址
 
img_Lists = glob.glob(src_img_dir + '/*.jpg')
img_basenames = []
for item in img_Lists:
    img_basenames.append(os.path.basename(item))#os.path.basename返回path最后的文件名
 
img_names = []
for item in img_basenames:
    temp1, temp2 = os.path.splitext(item) #os.path.splitext(“文件路径”)    分离文件名与扩展名
    img_names.append(temp1)
 
total_num = len(img_names) #统计当前总共要转换的图片标注数量
count = 0 #技术变量
for img in img_names: #这里的img是不加后缀的图片名称，如：'GF3_SAY_FSI_002732_E122.3_N29.9_20170215_L1A_HH_L10002188179__1__4320___10368'
    count +=1
    if count % 1000 == 0:
        print("当前转换进度{}/{}".format(count,total_num))
    im = Image.open((src_img_dir + img + '.jpg'))
    width, height = im.size
 
    #打开yolo格式下的txt文件
    gt = open(src_txt_dir + img + '.txt').read().splitlines()
    if gt:
        # 将主干部分写入xml文件中
        xml_file = src_xml_dir + img + '.xml'
        xml_file = open((src_xml_dir + img + '.xml'), 'w')
        xml_file.write('<annotation>\n')
        xml_file.write('    <folder>VOC2007</folder>\n')
        xml_file.write('    <filename>' + str(img) + '.jpg' + '</filename>\n')
        xml_file.write('    <size>\n')
        xml_file.write('        <width>' + str(width) + '</width>\n')
        xml_file.write('        <height>' + str(height) + '</height>\n')
        xml_file.write('        <depth>3</depth>\n')
        xml_file.write('    </size>\n')
 
        # write the region of image on xml file
        for img_each_label in gt:
            spt = img_each_label.split(' ')  # 这里如果txt里面是以逗号‘，’隔开的，那么就改为spt = img_each_label.split(',')。
            xml_file.write('    <object>\n')
            xml_file.write('        <name>' + str(labels[int(float(spt[0]))]) + '</name>\n')
            xml_file.write('        <pose>Unspecified</pose>\n')
            xml_file.write('        <truncated>0</truncated>\n')
            xml_file.write('        <difficult>0</difficult>\n')
            xml_file.write('        <bndbox>\n')
 
            center_x = round(float(spt[1].strip()) * width)
            center_y = round(float(spt[2].strip()) * height)
            bbox_width = round(float(spt[3].strip()) * width)
            bbox_height = round(float(spt[4].strip()) * height)
            xmin = str(int(center_x - bbox_width / 2))
            ymin = str(int(center_y - bbox_height / 2))
            xmax = str(int(center_x + bbox_width / 2))
            ymax = str(int(center_y + bbox_height / 2))
 
            xml_file.write('            <xmin>' + xmin + '</xmin>\n')
            xml_file.write('            <ymin>' + ymin + '</ymin>\n')
            xml_file.write('            <xmax>' + xmax + '</xmax>\n')
            xml_file.write('            <ymax>' + ymax + '</ymax>\n')
            xml_file.write('        </bndbox>\n')
            xml_file.write('    </object>\n')
 
        xml_file.write('</annotation>')
    else:
        # 将主干部分写入xml文件中
        xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
        xml_file.write('<annotation>\n')
        xml_file.write('    <folder>VOC2007</folder>\n')
        xml_file.write('    <filename>' + str(img) + '.jpg' + '</filename>\n')
        xml_file.write('    <size>\n')
        xml_file.write('        <width>' + str(width) + '</width>\n')
        xml_file.write('        <height>' + str(height) + '</height>\n')
        xml_file.write('        <depth>3</depth>\n')
        xml_file.write('    </size>\n')
        xml_file.write('</annotation>')

第七步：可视化

验证标签转换后知否正确，用xml标签进行可视化，多测试几张图片，找一些目标多的图片验证标签的正确性

代码如下：

# visualization_xml_OD.py

from lxml import etree
import cv2 as cv
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np


def parse_xml_to_dict(xml):
    """
    将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
    Args：
        xml: xml tree obtained by parsing XML file contents using lxml.etree

    Returns:
        Python dictionary holding XML contents.
    """

    if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
        return {xml.tag: xml.text}

    result = {}
    for child in xml:
        child_result = parse_xml_to_dict(child)  # 递归遍历标签信息
        if child.tag != 'object':
            result[child.tag] = child_result[child.tag]
        else:
            if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                result[child.tag] = []
            result[child.tag].append(child_result[child.tag])
    return {xml.tag: result}


def get_xml_info(xml_path):
    with open(xml_path) as fid:
        xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = parse_xml_to_dict(xml)["annotation"]
    bboxes = []
    for index, obj in enumerate(data["object"]):
        # 获取每个object的box信息
        xmin = int(obj["bndbox"]["xmin"])
        xmax = int(obj["bndbox"]["xmax"])
        ymin = int(obj["bndbox"]["ymin"])
        ymax = int(obj["bndbox"]["ymax"])
        # bbox = np.array([xmin, ymin, xmax, ymax])
        bbox = [xmin, ymin, xmax, ymax]
        bboxes.append(bbox)
    return bboxes


img_path = "D:/DeskTop/Datasets/clothes/images/000056.jpg"    # 需要可是化的图片
xml_path = "D:/DeskTop/Datasets/clothes/label_xml/000056.xml"     # 图片对应的标签
img = cv.imread(img_path)

bboxes = np.array(get_xml_info(xml_path))
for box in bboxes:
    pt1 = (box[0], box[1])
    pt2 = (box[2], box[3])
    cv.rectangle(img, pt1, pt2, (0, 0, 255), 4)



plt.figure(1)
plt.imshow(img[:, :, ::-1], cmap='gray')
plt.show()

最终结果

在这里插入图片描述
最终的处理结果包含四个文件夹，数据集图片以及三种类型的标签

至此，从labelme格式转为YOLO和VOC格式的任务就完成了。

下面是将txt标签中的科学计数法表示转为float的代码，有需要的或是强迫症患者可以参考一下。
代码如下：
先将txt中的’+‘替换为’-’

# change_txt_'+'_to_'-'.py

import os
def trans(input_dir, output_dir, word, splitword):
        for root, dirs, files in os.walk(input_dir):
           for item in files:
               if os.path.splitext(item)[1] == ".txt":
                   f = open(input_dir+item, "r", encoding='UTF-8')
                   content = f.read()
                   content = content.replace(word, splitword)
                   with open(os.path.join(output_dir, item), 'w', encoding='UTF-8') as fval:
                           fval.write(content)
                   f.close()
                
                
if __name__ == '__main__':
   # 老文件夹
   input_dir = "D:\DeskTop\Datasets\clothes\label_txt/"
   # 新文件夹
   output_dir = "D:\DeskTop\Datasets\clothes\label_txt/"
   # 要删除的字符
   word='+'
   # 要替换成的字符
   splitword = "-"
   trans(input_dir, output_dir, word, splitword)

再将科学计数法转为float

# !usr/bin env python
# -*- coding: utf-8 -*-
 
 
import re
import math
import os
 
 
def ConvertELogStrToValue(eLogStr):
    """
    convert string of natural logarithm base of E to value
    return (convertOK, convertedValue)
    eg:
    input:  -1.1694737e-03
    output: -0.001169
    input:  8.9455025e-04
    output: 0.000895
    """
 
    (convertOK, convertedValue) = (False, 0.0)
    foundEPower = re.search("(?P<coefficientPart>-?\d+\.\d+)e(?P<ePowerPart>-\d+)", eLogStr, re.I)
    #print "foundEPower=",foundEPower
    if(foundEPower):
        coefficientPart = foundEPower.group("coefficientPart")
        ePowerPart = foundEPower.group("ePowerPart")
        #print "coefficientPart=%s,ePower=%s"%(coefficientPart, ePower)
        coefficientValue = float(coefficientPart)
        ePowerValue = float(ePowerPart)
        #print "coefficientValue=%f,ePowerValue=%f"%(coefficientValue, ePowerValue)
        #math.e= 2.71828182846
        # wholeOrigValue = coefficientValue * math.pow(math.e, ePowerValue)
        wholeOrigValue = coefficientValue * math.pow(10, ePowerValue)
 
        #print "wholeOrigValue=",wholeOrigValue;
 
        (convertOK, convertedValue) = (True, wholeOrigValue)
    else:
        (convertOK, convertedValue) = (False, 0.0)
 
    return (convertOK, convertedValue)
 
def parseIntEValue(intEValuesStr):
    # print "intEValuesStr=", intEValuesStr
    intEStrList = re.findall("-?\d+\.\d+e-\d+", intEValuesStr)
    # intEStrList = intEValuesStr.split(' ')
    # print "intEStrList=", intEStrList
    for eachIntEStr in intEStrList:
        # intValue = int(eachIntEStr)
        # print "intValue=",intValue
        (convertOK, convertedValue) = ConvertELogStrToValue(eachIntEStr)
        #print "convertOK=%s,convertedValue=%f"%(convertOK, convertedValue)
        print("eachIntEStr=%s,\tconvertedValue=%f" % (eachIntEStr, convertedValue))
        trans(txt_path,txt_path,eachIntEStr,convertedValue)

def trans(input_dir, output_dir, word, splitword):
         for root, dirs, files in os.walk(input_dir):
            for item in files:
                if os.path.splitext(item)[1] == ".txt":
                    f = open(input_dir+item, "r", encoding='UTF-8')
                    content = f.read()
                    content = content.replace(str(word), str(splitword))
                    with open(os.path.join(output_dir, item), 'w', encoding='UTF-8') as fval:
                            fval.write(content)
                    f.close()
   
 
# intEValuesStr= 2.1690427e-005 -1.1694737e-003 -6.1193734e-004
# 8.9455025e-004 -8.6277081e-004 -7.2735757e-004
# intEStrList= ['2.1690427e-005', '-1.1694737e-003', '-6.1193734e-004', '8.9455025e-004', '-8.6277081e-004', '-7.2735757e-004']
# eachIntEStr=2.1690427e-005,     convertedValue=0.014615
# eachIntEStr=-1.1694737e-003,    convertedValue=-0.058225
# eachIntEStr=-6.1193734e-004,    convertedValue=-0.112080
# eachIntEStr=8.9455025e-004,     convertedValue=0.163843
# eachIntEStr=-8.6277081e-004,    convertedValue=-0.158022
# eachIntEStr=-7.2735757e-004,    convertedValue=-0.133220
 
if __name__ == "__main__":
    txt_path = "D:\DeskTop\Datasets\clothes\label_txt/"
    output_dir = "D:\DeskTop\Datasets\clothes\label_txt/"
    # data_path = "D:/DeskTop/000001.txt"
    for root, dirs, files in os.walk(txt_path):
        for item in files:
            if os.path.splitext(item)[1] == ".txt":
                with open(txt_path + item, 'r') as f:
                    for line in f.readlines():
                        linestr = line.strip()
                        # print linestr
                        parseIntEValue(linestr)