目标检测标签格式转换（水平框）-CFANZ编程社区

文章目录

COCO转YOLO
YOLO转VOC

COCO转YOLO

import json
import os
import glob
import cv2


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)

def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

def decode_json(json_path, txt_name):
    txt_file = open(txt_name, 'w')
    data = json.load(open(json_path, 'r', encoding='utf-8'))

    img_w = data['imageWidth']
    img_h = data['imageHeight']

    for i in data['shapes']:

        if (i['shape_type'] == 'rectangle' and i['label'] == 'js'):
            x1 = int(i['points'][0][0])
            y1 = int(i['points'][0][1])
            x2 = int(i['points'][1][0])
            y2 = int(i['points'][1][1])

            bb = (x1, x2, y1, y2)
            bbox = convert((img_w, img_h), bb)
            txt_file.write( '0' + " " + " ".join([str(a) for a in bbox]) + '\n')


if __name__ == '__main__':
    mkdir('labels')
    jsonList = glob.glob('images/*.json')
    for jsonPath in jsonList:
        baseName = os.path.basename(jsonPath).split('.')[0]
        txtName = f'labels/{baseName}.txt'
        decode_json(jsonPath, txtName)

YOLO转VOC

import os
import xml.etree.ElementTree as ET
from PIL import Image
import numpy as np


def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)


img_path = './JPEGImages/'                   #原图.jpg文件的路径
labels_path = './labels/'                    #labels中.txt文件的路径
annotations_path = './Annotations/'          #生成的xml文件需要保存的路径
### 标签的类别
classes = ['Aluminium foil', 'Battery', 'Aluminium blister pack', 'Carded blister pack', 'Other plastic bottle', 'Clear plastic bottle', 'Glass bottle', 'Plastic bottle cap', 'Metal bottle cap', 'Broken glass', 'Food Can', 'Aerosol', 'Drink can', 'Toilet tube', 'Other carton', 'Egg carton', 'Drink carton', 'Corrugated carton', 'Meal carton', 'Pizza box', 'Paper cup', 'Disposable plastic cup', 'Foam cup', 'Glass cup', 'Other plastic cup', 'Food waste', 'Glass jar', 'Plastic lid', 'Metal lid', 'Other plastic', 'Magazine paper', 'Tissues', 'Wrapping paper', 'Normal paper', 'Paper bag', 'Plastified paper bag', 'Plastic film', 'Six pack rings', 'Garbage bag', 'Other plastic wrapper', 'Single-use carrier bag', 'Polypropylene bag', 'Crisp packet', 'Spread tub', 'Tupperware', 'Disposable food container', 'Foam food container', 'Other plastic container', 'Plastic glooves', 'Plastic utensils', 'Pop tab', 'Rope & strings', 'Scrap metal', 'Shoe', 'Squeezable tube', 'Plastic straw', 'Paper straw', 'Styrofoam piece', 'Unlabeled litter', 'Cigarette']

mkdir(annotations_path)
labels = os.listdir(labels_path)


def write_xml(imgname, filepath, labeldicts, w, h):                     #参数imagename是图片名（无后缀）
    root = ET.Element('Annotation')                             #创建Annotation根节点
    ET.SubElement(root, 'filename').text = str(imgname)         #创建filename子节点（无后缀）
    sizes = ET.SubElement(root,'size')                          #创建size子节点
    ET.SubElement(sizes, 'width').text = str(w)                 #没带脑子直接写了原图片的尺寸......
    ET.SubElement(sizes, 'height').text = str(h)
    ET.SubElement(sizes, 'depth').text = '3'                    #图片的通道数：img.shape[2]
    for labeldict in labeldicts:
        objects = ET.SubElement(root, 'object')                 #创建object子节点
        ET.SubElement(objects, 'name').text = labeldict['name']        #BDD100K_10.names文件中
                                                                       #的类别名
        ET.SubElement(objects, 'pose').text = 'Unspecified'
        ET.SubElement(objects, 'truncated').text = '0'
        ET.SubElement(objects, 'difficult').text = '0'
        bndbox = ET.SubElement(objects,'bndbox')
        ET.SubElement(bndbox, 'xmin').text = str(int(labeldict['xmin']))
        ET.SubElement(bndbox, 'ymin').text = str(int(labeldict['ymin']))
        ET.SubElement(bndbox, 'xmax').text = str(int(labeldict['xmax']))
        ET.SubElement(bndbox, 'ymax').text = str(int(labeldict['ymax']))
    tree = ET.ElementTree(root)
    tree.write(filepath, encoding='utf-8')


for label in labels:                                           #批量读.txt文件
    with open(labels_path + label, 'r') as f:
        img_id = os.path.splitext(label)[0]
        contents = f.readlines()
        labeldicts = []
        img = np.array(Image.open(img_path + label.strip('.txt') + '.jpg'))
        sh, sw = img.shape[0], img.shape[1]
        for content in contents:
                         #img.shape[0]是图片的高度720
                                                               #img.shape[1]是图片的宽度720
            content = content.strip('\n').split()
            x=float(content[1])*sw
            y=float(content[2])*sh
            w=float(content[3])*sw
            h=float(content[4])*sh
            new_dict = {'name': classes[int(content[0])],
                        'difficult': '0',
                        'xmin': x-w/2,                      #坐标转换公式看另一篇文章....
                        'ymin': y-h/2,
                        'xmax': x+w/2,
                        'ymax': y+h/2
                        }
            labeldicts.append(new_dict)
        write_xml(img_id, annotations_path + label.strip('.txt') + '.xml', labeldicts, sw, sh)