数据
我们已有标注数据个数为json保存的,现在训练代码使用的标注格式为PASCAL VOC2007,为了不修改代码,将数据转换到PASCAL VOC2007的xml格式。
转换代码
import os
import json
from lxml.etree import Element, SubElement, tostring, ElementTree
from xml.dom import minidom
# 从文件夹下获取json文件名
def ReadFileDir(path):
file = []
dirs = os.listdir(path) # 获取指定路径下的文件
for i in dirs: # 循环读取路径下的文件并筛选输出
if os.path.splitext(i)[1] == ".json": # 筛选json文件
file.append(i)
return file
# 加载json文件
def ReadJson(path):
with open(path,'r') as load_f:
load_dict = json.load(load_f)
return load_dict
# xml创建一个子项
def subElement(root, tag, text):
ele = SubElement(root, tag)
if text != "":
ele.text = text
return ele
# 保存为xml文件,并格式化
def saveXML(root, filename, indent="\t", newl="\n", encoding="utf-8"):
rawText = tostring(root)
dom = minidom.parseString(rawText)
with open(filename, 'w') as f:
dom.writexml(f, "", indent, newl, encoding)
# 基础信息保存在xml中
def make_xml(image_name, width, height):
node_root = Element('annotation')
subElement(node_root, "folder", "widerface")
subElement(node_root, "filename", image_name)
node_source = subElement(node_root, "source", "")
subElement(node_source, "database", "wider face Database")
subElement(node_source, "annotation", "PASCAL VOC2007")
subElement(node_source, "image", "flickr")
subElement(node_source, "flickrid", "-1")
node_owner = subElement(node_root, "owner", "")
subElement(node_owner, "flickrid", "yanyu")
subElement(node_owner, "name", "yanyu")
subElement(node_root, "segmented", "0")
node_size = subElement(node_root, "size", "")
subElement(node_size, "width", str(width))
subElement(node_size, "height", str(height))
subElement(node_size, "depth", "3")
return node_root
if __name__ == "__main__":
path = '/home/q/train/Data/images/hxlx'
save_xml_dir = 'xml'
jsonfile = ReadFileDir(path)
for i, v in enumerate(jsonfile):
dict = ReadJson(os.path.join(path, v))
if 'photo_id' in dict:
image_id = dict['photo_id']
image_name = image_id + ".jpg"
else:
continue
if 'width' in dict:
width = dict['width']
else:
continue
if 'height' in dict:
height = dict['height']
else:
continue
node_root = make_xml(image_name, width, height)
if 'croppers' in dict:
croppers = dict['croppers']
for i, crop_dict in enumerate(croppers):
print (crop_dict)
xmin = int(crop_dict['x'] * width)
ymin = int(crop_dict['y'] * height)
xmax = int((crop_dict['x'] + crop_dict['width']) * width)
ymax = int((crop_dict['x'] + crop_dict['height']) * height)
node_object = subElement(node_root, "object", "")
subElement(node_object, "name", crop_dict["cropper_type"])
subElement(node_object, "pose", 'Unspecified')
subElement(node_object, "truncated", '1')
subElement(node_object, "difficult", '0')
node_bndbox = subElement(node_object, "bndbox", "")
subElement(node_bndbox, "xmin", str(xmin))
subElement(node_bndbox, "ymin", str(ymin))
subElement(node_bndbox, "xmax", str(xmax))
subElement(node_bndbox, "ymax", str(ymax))
subElement(node_object, "has_lm", '0')
# 保存xml文件
saveXML(node_root, os.path.join(save_xml_dir, image_id + ".xml"))
具体使用根据自己的数据来添加