近段时间在pyfasterrcnn 下训练fan缺陷的任务,原先使用的标注文件(VOC格式),其中的标注有一些不当之处,因此使用了labelImg这个标注工具进行了修改,但修改后的文件不符合我自己原先的标注文件,而且修改后的标注文件缺失一部分原始的标注信息,为此写了一个整合labelImg标注文件和原始标注文件到新的标注文件的程序。在此使用xml.etree.ElementTree包

原始标注文件的结构:

<annotation>
	<folder>FanDamage</folder>
	<filename>000002.jpg</filename>
	<source>
		<database>The FanDamage Database</database>
		<annotation>FanDamage</annotation>
		<image>SanLa</image>
		<flickrid>000002</flickrid>
	</source>
	<owner>
		<flickrid>Jack</flickrid>
		<name>TingGo</name>
	</owner>
	<size>
		<width>640</width>
		<height>480</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>break</name>
		<color>red</color>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>255</xmin>
			<ymin>184</ymin>
			<xmax>266</xmax>
			<ymax>206</ymax>
		</bndbox>
	</object>
	<fan>
		<name>fan</name>
		<color>red</color>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>192</xmin>
			<ymin>1</ymin>
			<xmax>327</xmax>
			<ymax>338</ymax>
		</bndbox>
	</fan>
</annotation>

imagelabel修改后的标注文件的结构:

<annotation>
	<folder>PreJPEGImages</folder>
	<filename>000001.jpg</filename>
	<path>C:\Users\Administrator\Desktop\windows_v1.6.0\ImageSet\PreJPEGImages\000001.jpg</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>640</width>
		<height>480</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>break</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>127</xmin>
			<ymin>185</ymin>
			<xmax>145</xmax>
			<ymax>193</ymax>
		</bndbox>
	</object>
	<object>
		<name>pollute</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>33</xmin>
			<ymin>138</ymin>
			<xmax>98</xmax>
			<ymax>155</ymax>
		</bndbox>
	</object>
	<object>
		<name>pollute</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>220</xmin>
			<ymin>181</ymin>
			<xmax>229</xmax>
			<ymax>188</ymax>
		</bndbox>
	</object>
	<object>
		<name>blister</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>104</xmin>
			<ymin>146</ymin>
			<xmax>199</xmax>
			<ymax>183</ymax>
		</bndbox>
	</object>
	<object>
		<name>blister</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>241</xmin>
			<ymin>170</ymin>
			<xmax>411</xmax>
			<ymax>233</ymax>
		</bndbox>
	</object>
	<object>
		<name>blister</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>424</xmin>
			<ymin>192</ymin>
			<xmax>593</xmax>
			<ymax>267</ymax>
		</bndbox>
	</object>
</annotation>

转化主程序,由于程序有比较详尽的注解在此不多述,程序主要包含xml文件的创建,具体如下:

#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 2018/08/15 by DQ

import os 
from xml.etree.ElementTree import Element, SubElement, ElementTree
from GetAnnotInfo import GetDamageAnnotBoxLoc,GetFanAnnotBoxLoc,GetPreImSourceObjectColor,GetImFolder

ImExpName='.jpg'
AnotExpName='.xml'
PreImAnotFolder='FanDamage'

#for temp test
PreAnotFolder='/home/Ok/TempTest/PreImAnot'#以前的Fan及其缺陷标记文件(VOC格式)
FixAnotFolder='/home/Ok/TempTest/FixImAnot2'#使用imagelabel 工具修改后的标记文件(VOC格式)
AnotWriteFolder='/home/Ok/TempTest/FixImAnot2' #把修改后的文件转化为我自己使用的标记文件格式

#写目标框到标记文件的程序,因为目标框可能有好几个所以单独写个函数好些
def WriteObject(Object,ObjectName,Bndbox,ObjectColor):#=SubElement(object,'').text=
	name=SubElement(Object,'name').text=ObjectName #创建目标的名字
	color=SubElement(Object,'color').text=ObjectColor #目标区域大区域颜色
	truncated=SubElement(Object,'truncated').text='0'
	difficult=SubElement(Object,'difficult').text='0'
	#标记框具体数值,需要转化为字符串
	bndbox=SubElement(Object,'bndbox') #=SubElement(bndbox,'').text=
	xmin=SubElement(bndbox,'xmin').text=str(Bndbox[0])
	ymin=SubElement(bndbox,'ymin').text=str(Bndbox[1])
	xmax=SubElement(bndbox,'xmax').text=str(Bndbox[2])
	ymax=SubElement(bndbox,'ymax').text=str(Bndbox[3])
	

#把使用imagelabel 工具修改后的标记文件(VOC格式)转化为我自己训练的数据格式
#因为原始的标注不是每一个文件都需要修改的,修改的地方主要是fan上的缺陷标记部分,
#因此fan标记框可以不用在imagelabel标记,而使用imagelabel修改后的文件是不包含fan颜色等的内容的
#所以要从原始文件中抽取fan的颜色信息	
def VocXml2MyXml(AnotName,ImSource,ObjectColor,FanPart,ObjectSet):
	SplitStr=AnotName.split('.')
	ImId=SplitStr[0]
	ImName=ImId+ImExpName
  
	Root=Element('annotation') #=SubElement(Root,'')#创建根节点
	folder=SubElement(Root,'folder').text='FanDamage'#在根节点下创建一级子节点
	filename=SubElement(Root,'filename').text=ImName 

	source=SubElement(Root,'source')#=SubElement(source,'').text #在根节点下创建一级子节点,没有赋值,它含有好几个孩子
	database=SubElement(source,'database').text='The FanDamage Database' #在source下创建子节点,属于根的二级子节点
	annotation=SubElement(source,'annotation').text='FanDamage'
	image=SubElement(source,'image').text=ImSource
	flickrid=SubElement(source,'flickrid').text=ImId
	owner=SubElement(Root,'owner')
	flickrid=SubElement(owner,'flickrid').text='Pine'
	name=SubElement(owner,'name').text='DQ'

	size=SubElement(Root,'size') #=SubElement(size,'').text #在根节点下创建一级子节点,没有赋值,它含有好几个孩子
	width=SubElement(size,'width').text='640' #在source下创建子节点,属于根的二级子节点
	height=SubElement(size,'height').text='480'
	depth=SubElement(size,'depth').text='3'

	segmented=SubElement(Root,'segmented').text='0'
   
	if FanPart:#原始文件存在没有标注的Fan框,所以要判断一下
		FanName='fan'
		for Bndbox in FanPart[FanName]:
			fan=SubElement(Root,FanName)
			WriteObject(fan,FanName,Bndbox,ObjectColor)
	if ObjectSet:#原始文件可能没有修改
		for ObjectName,BndboxSet in ObjectSet.iteritems():
			for Bndbox in BndboxSet:
				Object=SubElement(Root,'object')
				WriteObject(Object,ObjectName,Bndbox,ObjectColor)

	Tree = ElementTree(Root)#把这个标记数据节点组合成一颗树型结构
	AnotPath=os.path.join(AnotWriteFolder,AnotName)
	Tree.write(AnotPath)
	

#临时测试用
def TempTestAnotFile():
	for i in range(1,16):
		AnotName=str(i).zfill(6)+AnotExpName
		FixAnotPath=os.path.join(FixAnotFolder,AnotName)#修改文件的标注路径
		folderText=GetImFolder(FixAnotPath)#使用imagelabel标注文件时图片集合所在文件夹	
		if folderText==PreImAnotFolder:#我的修改实在另外一个地方,所以这个文件夹名字和原始的是不一样的
			continue    #这个可以作为判断一个文件是否被修改过,如果原始文件没有修改,就使用原始的吧
		PreAnotPath=os.path.join(PreAnotFolder,AnotName)#以前的文件路径
		ImSource,ObjectColor=GetPreImSourceObjectColor(PreAnotPath)#抽取以前文件的图像集合名和fan的颜色
		FanPart=GetFanAnnotBoxLoc(PreAnotPath)#抽取fan部分的标记框

		ObjectSet=GetDamageAnnotBoxLoc(FixAnotPath)#抽取修改后的缺陷标记框
		VocXml2MyXml(AnotName,ImSource,ObjectColor,FanPart,ObjectSet)#整合上述信息转为为我使用的数据格式

#转化全部的修改文件,同上TempTestAnotFile()
def GenerateRealAnotFile():
	#AnotNameSet=os.listdir(FixAnotFolder)
	#for AnotName in AnotNameSet:
	for i in range(1,1981):
		print i
		AnotName=str(i).zfill(6)+AnotExpName
		FixAnotPath=os.path.join(FixAnotFolder,AnotName)
		folderText=GetImFolder(FixAnotPath)	
		if folderText==PreImAnotFolder:
			continue
		PreAnotPath=os.path.join(PreAnotFolder,AnotName)
		ImSource,ObjectColor=GetPreImSourceObjectColor(PreAnotPath)
		if ImSource is None or ObjectColor is None:
			continue
		FanPart=GetFanAnnotBoxLoc(PreAnotPath)

		ObjectSet=GetDamageAnnotBoxLoc(FixAnotPath)
		VocXml2MyXml(AnotName,ImSource,ObjectColor,FanPart,ObjectSet)
	

#TempTestAnotFile()
GenerateRealAnotFile()

子程序,由于程序简明扼要,不做注解,其这个程序主要包含xml文件的读取过程,这个我上一篇的博客相同,程序具体如下:

#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 2018/08/15 by DQ
# get annotation object bndbox location
try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET
										         
##get object annotation bndbox loc start 
def GetDamageAnnotBoxLoc(AnotPath):
    tree = ET.ElementTree(file=AnotPath)  #open xml 
    root = tree.getroot()
    ObjectSet=root.findall('object')
    ObjBndBoxSet={}
    for Object in ObjectSet:
        ObjName=Object.find('name').text
        BndBox=Object.find('bndbox')
        x1 = int(BndBox.find('xmin').text)#-1
        y1 = int(BndBox.find('ymin').text)#-1
        x2 = int(BndBox.find('xmax').text)#-1
        y2 = int(BndBox.find('ymax').text)#-1
        BndBoxLoc=[x1,y1,x2,y2]
        if ObjBndBoxSet.has_key(ObjName):
        	ObjBndBoxSet[ObjName].append(BndBoxLoc)
        else:
        	ObjBndBoxSet[ObjName]=[BndBoxLoc]#why not ues dict(key=val)?
    return ObjBndBoxSet
##get object annotation bndbox loc end


##get fan annotation bndbox loc start 
def GetFanAnnotBoxLoc(AnotPath):
    tree = ET.ElementTree(file=AnotPath)  #open xml 
    root = tree.getroot()
    ObjectSet=root.findall('blade')
    ObjBndBoxSet={}
    for Object in ObjectSet:
        ObjName=Object.find('name').text
        BndBox=Object.find('bndbox')
        x1 = int(BndBox.find('xmin').text)#-1
        y1 = int(BndBox.find('ymin').text)#-1
        x2 = int(BndBox.find('xmax').text)#-1
        y2 = int(BndBox.find('ymax').text)#-1
        BndBoxLoc=[x1,y1,x2,y2]
        if ObjBndBoxSet.has_key(ObjName):
            ObjBndBoxSet[ObjName].append(BndBoxLoc)
        else:
            ObjBndBoxSet[ObjName]=[BndBoxLoc]#why not ues dict(key=val)?          
    return ObjBndBoxSet
##get fan annotation bndbox loc end


def GetPreImSourceObjectColor(AnotPath):
    tree = ET.ElementTree(file=AnotPath)  #open xml 
    root = tree.getroot()
    source=root.find('source')
    ImSource=source.find('image').text
    ObjectSet=root.findall('object')
    ObjectColor=None
    if not ImSource:
        ImSource=None
    for Object in ObjectSet:
        ObjectColor=Object.find('color').text
        if ObjectColor:
            break
    return ImSource,ObjectColor


def GetImFolder(AnotPath):
    tree = ET.ElementTree(file=AnotPath)  #open xml 
    root = tree.getroot()
    folderText=root.find('folder').text

    return folderText

至此就完成了转化labelImg标注文件到自己的标注文件的过程

GitHub 加速计划 / la / labelImg
22.31 K
6.24 K
下载
🎉 超级实用!LabelImg,图像标注神器,现在加入Label Studio社区,享受多模态数据标注新体验!🚀 简单易用,支持XML、YOLO和CreateML格式,适用于ImageNet等项目。不再单独维护,立即尝试Label Studio,安装一键到位,更灵活,功能更强大!👇 安装即刻开始:pip3 install labelImg,或访问<https://github.com/heartexlabs/label-studio> 获取源码构建。一起探索数据标注的新边界!👨‍💻👩‍💻【此简介由AI生成】
最近提交(Master分支:1 个月前 )
b33f965b Adds information about Label Studio community to welcome LabelImg users 2 年前
2d5537ba 2 年前
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐