自制DL数据集常用代码 发表于 2018-04-20 | 更新于 2019-01-07 | 分类于 DeepLearning | 阅读次数: 本文字数: 5.2k | 阅读时长 ≈ 5 分钟 txt转xml格式, 统一修改数据集label, 数据集label排错 txt转xml格式123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106#coding=utf-8# 根据一个给定的XML Schema,使用DOM树的形式从空白文件生成一个XMLfrom xml.dom.minidom import Documentimport cv2import osdef generate_xml(name,split_lines,img_size,class_ind): doc = Document() # 创建DOM文档对象 annotation = doc.createElement('annotation') doc.appendChild(annotation) title = doc.createElement('folder') title_text = doc.createTextNode('KITTI') title.appendChild(title_text) annotation.appendChild(title) img_name=name+'.png' title = doc.createElement('filename') title_text = doc.createTextNode(img_name) title.appendChild(title_text) annotation.appendChild(title) source = doc.createElement('source') annotation.appendChild(source) title = doc.createElement('database') title_text = doc.createTextNode('The KITTI Database') title.appendChild(title_text) source.appendChild(title) title = doc.createElement('annotation') title_text = doc.createTextNode('KITTI') title.appendChild(title_text) source.appendChild(title) size = doc.createElement('size') annotation.appendChild(size) title = doc.createElement('width') title_text = doc.createTextNode(str(img_size[1])) title.appendChild(title_text) size.appendChild(title) title = doc.createElement('height') title_text = doc.createTextNode(str(img_size[0])) title.appendChild(title_text) size.appendChild(title) title = doc.createElement('depth') title_text = doc.createTextNode(str(img_size[2])) title.appendChild(title_text) size.appendChild(title) for split_line in split_lines: line=split_line.strip().split() if line[0] in class_ind: object = doc.createElement('object') annotation.appendChild(object) title = doc.createElement('name') title_text = doc.createTextNode(line[0]) title.appendChild(title_text) object.appendChild(title) bndbox = doc.createElement('bndbox') object.appendChild(bndbox) title = doc.createElement('xmin') title_text = doc.createTextNode(str(int(float(line[4])))) title.appendChild(title_text) bndbox.appendChild(title) title = doc.createElement('ymin') title_text = doc.createTextNode(str(int(float(line[5])))) title.appendChild(title_text) bndbox.appendChild(title) title = doc.createElement('xmax') title_text = doc.createTextNode(str(int(float(line[6])))) title.appendChild(title_text) bndbox.appendChild(title) title = doc.createElement('ymax') title_text = doc.createTextNode(str(int(float(line[7])))) title.appendChild(title_text) bndbox.appendChild(title) # 将DOM对象doc写入文件 f = open('.../Annotations/'+name+'.xml','w') f.write(doc.toprettyxml(indent = '')) f.close()if __name__ == '__main__': class_ind=('Pedestrian', 'Car', 'Truck', 'Van','Cyclist') cur_dir='.../training/' labels_dir=os.path.join(cur_dir,'label') #txt文件的路径 for parent, dirnames, filenames in os.walk(labels_dir): # 分别得到根目录,子目录和根目录下文件 for file_name in filenames: full_path=os.path.join(parent, file_name) # 获取文件全路径 print full_path f=open(full_path) split_lines = f.readlines() name= file_name[:-4] # 后四位是扩展名.txt,只取前面的文件名 img_name=name+'.png' img_path=os.path.join('/home/chaowei/data/KITTI/Detection/Image/training/image_2',img_name) # 路径需要自行修改 img_size=cv2.imread(img_path).shape generate_xml(name,split_lines,img_size,class_ind)print('all txts has converted into xmls') 统一修改数据集label123456789101112131415161718192021222324252627282930313233343536373839404142434445464748#!/usr/bin/python# coding=utf-8import osfrom xml.etree.ElementTree import ElementTree, Elementimport sysdef read_xml(in_path): ''' Read xml file and parse it ''' try: tree = ElementTree() tree.parse(in_path) except Exception as e: tree = None # if file is empty, just continue return treelabel_prefix = os.path.join("path/Annotations") #原始的标注文件路径label_prefix2 = os.path.join("path/Annotations2") #修改后的标注文件路径frame_cnt = 0all_sum = 0label_path = label_prefixlabel_folder = os.path.basename(label_path)annolist = os.listdir(label_path)sum = 0for idx, anno in enumerate(annolist): tree = read_xml(os.path.join(label_path, anno)) #读取xml文件内容为一个DOM树 out_path = os.path.join(label_prefix2, anno) if tree is None: continue frame_cnt += 1 object_node = tree.findall('object') cnt = 0 for obj in object_node: name = obj.find('name').text if name == 'person': #假设要将所有的'person' label修改为'Person' print anno obj.find('name').text = 'Person' cnt += 1 sum += cnt tree.write(out_path, encoding="utf-8",xml_declaration=True) #将修改后的DOM树写入xml文件 相关文章 神经网络基础-by Andrew Ng 深层神经网络-by Andrew Ng Caffe-ssd的Python接口封装 详解目标检测任务中的评价指标 caffe相关参数说明 本文作者: Chaowei 本文链接: http://huchaowei.com/2018/04/20/DL_DatasetMaking-Code/ 版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处! -------------本文结束感谢您的阅读-------------