"""
change VOL格式(images,xmls:未归一化的左上角右下角)
to YOLO格式:images and txts(per txt:some lines(class,归一化的cx、cy、w、h))
"""
from xml.dom.minidom import parse
import glob
# 统计有哪些类别
if 0:
classes = []
for xml_file in glob.glob(r"D:\dataset\NEU-DET\train\ANNOTATIONS\*.xml"):
DOMTree = parse(xml_file)
objects = DOMTree.documentElement.getElementsByTagName("object")
for item in objects:
name = item.getElementsByTagName("name")[0]
name = name.childNodes[0].data
if name not in classes:
classes.append(name)
print(classes) # ['crazing', 'patches', 'inclusion', 'pitted_surface', 'rolled-in_scale', 'scratches']
if 1:
classes = ['crazing', 'patches', 'inclusion', 'pitted_surface', 'rolled-in_scale', 'scratches']
i = 0
for xml_file in glob.glob(r"D:\dataset\NEU-DET\train\ANNOTATIONS\*.xml"):
i += 1
DOMTree = parse(xml_file)
# 拿到这张图片的wh
size = DOMTree.documentElement.getElementsByTagName("size")[0]
img_w = size.getElementsByTagName("width")[0]
img_w = img_w.childNodes[0].data
img_w = float(img_w)
img_h = size.getElementsByTagName("height")[0]
img_h = img_h.childNodes[0].data
img_h = float(img_h)
objects = DOMTree.documentElement.getElementsByTagName("object")
msg = ""
for item in objects:
name = item.getElementsByTagName("name")[0]
name = name.childNodes[0].data
# 根据name获取class
name = classes.index(str(name))
msg = msg + str(name) + " "
# 转换bbox
bbox = item.getElementsByTagName("bndbox")[0]
xmin = bbox.getElementsByTagName("xmin")[0]
xmin = xmin.childNodes[0].data
ymin = bbox.getElementsByTagName("ymin")[0]
ymin = ymin.childNodes[0].data
xmax = bbox.getElementsByTagName("xmax")[0]
xmax = xmax.childNodes[0].data
ymax = bbox.getElementsByTagName("ymax")[0]
ymax = ymax.childNodes[0].data
w = int(xmax) - int(xmin)
h = int(ymax) - int(ymin)
x_center = int(xmin) + int(w) / 2.0
y_center = int(ymin) + int(h) / 2.0
# 坐标归一化
w /= img_w
h /= img_h
x_center /= img_w
y_center /= img_h
msg = msg + str(x_center) + " "
msg = msg + str(y_center) + " "
msg = msg + str(w) + " "
msg = msg + str(h) + "\n"
# save
txt_name = xml_file.split("\\")[-1].replace(".xml", ".txt")
with open("D:\\dataset\\NEU-DET\\train\\labels\\" + txt_name, "w") as f:
f.write(msg)
f.close()
print(i)