目录
 
 
 
 
文章目录
 
  
 
 
学习笔记——一些数据转换脚本(Python)
 
 
json2YOLO(txt)
 
- 自定义
name2id - 自定义
json_floder_path、txt_outer_path - 保证存放 txt 的文件夹存在
 
 
import json
import os
name2id = {'crack_concrete': 4}  
def decode_json(json_floder_path, txt_outer_path, json_name):
    txt_name = os.path.join(txt_outer_path,json_name[:-5]) + '.txt'
    with open(txt_name, 'a') as f:
        json_path = os.path.join(json_floder_path, json_name)
        data = json.load(open(json_path, 'r', encoding='gb2312', errors='ignore'))
        img_w = data['imageWidth']
        img_h = data['imageHeight']
        isshape_type = data['shapes'][0]['shape_type']
        print(isshape_type)
        dw = 1. / (img_w)
        dh = 1. / (img_h)
        for i in data['shapes']:
            label_name = i['label']
            if (i['shape_type'] == 'polygon'):
                point = []
                for lk in range(len(i['points'])):
                    x = float(i['points'][lk][0])
                    y = float(i['points'][lk][1])
                    point_x = x * dw
                    point_y = y * dh
                    point.append(point_x)
                    point.append(point_y)
                f.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in point]) + '\n')
        f.close()
if __name__ == "__main__":
    json_floder_path = r'D:\JetBrains\PyCharm 2023.2\PycharmProjects\YOLOv8\datasets\crack_concrete_hybrid_augmentation\jsons'  
    txt_outer_path = r'D:\JetBrains\PyCharm 2023.2\PycharmProjects\YOLOv8\datasets\crack_concrete_hybrid_augmentation\labels'  
    json_names = os.listdir(json_floder_path)
    flagcount = 0
    for json_name in json_names:
        decode_json(json_floder_path, txt_outer_path, json_name)
        flagcount += 1
    print('-----------转化完毕------------')
 
 
 
 
VOC(xml)2YOLO(txt)
 
- 自定义
class_mapping - 自定义
input_folder、output_folder - 不必确保
output_folder文件夹存在,程序会自动创建 
 
import xml.etree.ElementTree as ET
import os
class_mapping = {
    "person": 0,
    "helmet": 1,
    "life jacket": 2,
    "truck": 3,
    "excavator": 4,
    "car crane": 5,
    "crawler crane": 6,
    "rotary drill rig": 7,
    "concrete tanker": 8,
    
}
input_folder = "../datasets/drone/labels/convert"
output_folder = "../datasets/drone/labels/train"
for xml_file in os.listdir(input_folder):
    if xml_file.endswith(".xml"):
        xml_path = os.path.join(input_folder, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()
        
        width = int(root.find("size/width").text)
        height = int(root.find("size/height").text)
        
        txt_file = os.path.splitext(xml_file)[0] + ".txt"
        txt_path = os.path.join(output_folder, txt_file)
        
        with open(txt_path, "w") as f:
            for obj in root.findall("object"):
                class_name = obj.find("name").text
                class_id = class_mapping.get(class_name)
                if class_id is not None:
                    bbox = obj.find("bndbox")
                    x_center = (float(bbox.find("xmin").text) + float(bbox.find("xmax").text)) / 2.0 / width
                    y_center = (float(bbox.find("ymin").text) + float(bbox.find("ymax").text)) / 2.0 / height
                    width_norm = (float(bbox.find("xmax").text) - float(bbox.find("xmin").text)) / width
                    height_norm = (float(bbox.find("ymax").text) - float(bbox.find("ymin").text)) / height
                    f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width_norm:.6f} {height_norm:.6f}\n")
 
 
 
 
image2h5
 
- 自定义
image_folder、h5_folder - 不必确保
h5_folder文件夹存在,程序会自动创建 
 
import h5py
from PIL import Image
import numpy as np
import os
def image_to_h5(image_folder, h5_folder):
    """图片转 h5"""
    
    if not os.path.exists(h5_folder):
        os.makedirs(h5_folder)
    
    files = os.listdir(image_folder)
    total_files = len(files)
    for i, file_name in enumerate(files, 1):
        
        if file_name.endswith('.png') or file_name.endswith('.jpg'):
            input_image_path = os.path.join(image_folder, file_name)
            output_h5_path = os.path.join(h5_folder, file_name.replace('.png', '.h5'))  
            
            image = Image.open(input_image_path)
            image_array = np.array(image)
            
            with h5py.File(output_h5_path, 'w') as hf:
                hf.create_dataset('image', data=image_array)
            
            print(f"Processed {i}/{total_files} images. Current image: {file_name}")
if __name__ == "__main__":
    image_folder = '../data/crack_concrete/images'  
    h5_folder = '../data/crack_concrete/h5s'  
    image_to_h5(image_folder, h5_folder)
 
 
 
 
json2npz
 
- 自定义
json_folder、npz_folder - 不必确保
npz_folder文件夹存在,程序会自动创建 
 
import os
import json
import numpy as np
from PIL import Image, ImageDraw
def load_json(json_path):
    """加载 json 文件"""
    with open(json_path, 'r') as f:
        return json.load(f)
def create_image(data):
    """读取图像数据"""
    image = Image.open(data['imagePath'])
    return np.array(image, dtype=np.float32)
def create_label(data):
    """创建与图像相同尺寸的空白 mask"""
    points = data['shapes'][0]['points']
    points_tuple = [(float(point[0]), float(point[1])) for point in points]
    label = Image.new('L', (data['imageWidth'], data['imageHeight']), 0)
    draw = ImageDraw.Draw(label)
    draw.polygon(points_tuple, fill=1)
    return np.array(label, dtype=np.float32)
def json_to_npz(json_folder, npz_folder):
    """将文件夹中的所有 json 文件转换为 npz 文件"""
    if not os.path.exists(npz_folder):
        os.makedirs(npz_folder)
    json_files = [file for file in os.listdir(json_folder) if file.endswith('.json')]
    total_files = len(json_files)
    for i, json_file in enumerate(json_files, 1):
        json_path = os.path.join(json_path, json_file)
        json_data = load_json(json_path)
        image_array = create_image(json_data)
        label_array = create_label(json_data)
        npz_file_path = os.path.join(npz_folder, json_file.split('.')[0] + '.npz')
        np.savez(npz_file_path, image=image_array, label=label_array)
        
        print(f"Processed {i}/{total_files} JSON files. Current file: {json_file}")
if __name__ == "__main__":
    json_folder = '../data/crack_concrete/jsons'
    npz_folder = '../data/crack_concrete/npzs'
    json_to_npz(json_folder, npz_folder)