对于一个语义分割数据集,可以使用如下方法统计样本集ground truth的类别分布情况:
import cv2, osimport numpy as np#amount of classerCLASSES_NUM = 21#find imagee in folder dirdef findImages(dir,topdown=True): im_list = [] if not os.path.exists(dir): print "Path for {} not exist!".format(dir) raise else: for root, dirs, files in os.walk(dir, topdown): for fl in files: im_list.append(fl) return im_list# amount of images corresponding to each classesimages_count = [0]*CLASSES_NUM# amount of pixels corresponding to each classclass_pixels_count = [0]*CLASSES_NUM# amount of pixels corresponding to the images of each classimage_pixels_count = [0]*CLASSES_NUMimage_folder = '../data/gt'im_list = findImages(image_folder) for im in im_list: print im cv_img = cv2.imread(os.path.join(image_folder, im), cv2.IMREAD_UNCHANGED) size_img = cv_img.shape colors = set([]) for i in range(size_img[0]): for j in range(size_img[1]): p_value = cv_img.item(i,j) if not p_value < CLASSES_NUM: # check print p_value else: class_pixels_count[p_value] = class_pixels_count[p_value] + 1 colors.add(p_value) im_size = size_img[0]*size_img[1] for n in range(CLASSES_NUM): if n in colors: images_count[n] = images_count[n] + 1 image_pixels_count[n] = image_pixels_count[n] + im_sizeprint images_countprint class_pixels_countprint image_pixels_count
上述代码,主要统计了每一类别所包含的图像数量(images_count
),每一类别的像素数目(class_pixels_count
)和每一类别对应的图像的总像素数目(image_pixels_count
),有了这三组统计结果,就可以进一步计算训练时每一类别的loss
的class_weight
。