103 lines
5.5 KiB
Python
103 lines
5.5 KiB
Python
import json
|
|
import unittest
|
|
from os.path import abspath, dirname
|
|
|
|
from roboflow.util import folderparser
|
|
|
|
thisdir = dirname(abspath(__file__))
|
|
|
|
|
|
class TestFolderParser(unittest.TestCase):
|
|
def test_parse_chess(self):
|
|
chessfolder = f"{thisdir}/../datasets/chess"
|
|
parsed = folderparser.parsefolder(chessfolder)
|
|
_assertJsonMatchesFile(parsed, f"{thisdir}/../output/parse-chess.json")
|
|
|
|
def test_parse_sharks_coco(self):
|
|
sharksfolder = f"{thisdir}/../datasets/sharks-tiny-coco"
|
|
parsed = folderparser.parsefolder(sharksfolder)
|
|
testImagePath = "/train/sharks_mp4-20_jpg.rf.90ba2e8e9ca0613f71359efb7ed48b26.jpg"
|
|
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
|
|
assert len(json.loads(testImage["annotationfile"]["rawText"])["annotations"]) == 5
|
|
|
|
def test_parse_sharks_createml(self):
|
|
sharksfolder = f"{thisdir}/../datasets/sharks-tiny-createml"
|
|
parsed = folderparser.parsefolder(sharksfolder)
|
|
testImagePath = "/train/sharks_mp4-20_jpg.rf.5359121123e86e016401ea2731e47949.jpg"
|
|
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
|
|
imgParsedAnnotations = json.loads(testImage["annotationfile"]["rawText"])
|
|
assert len(imgParsedAnnotations) == 1
|
|
imgReference = imgParsedAnnotations[0]
|
|
assert len(imgReference["annotations"]) == 5
|
|
|
|
def test_parse_sharks_yolov9(self):
|
|
def test(sharksfolder):
|
|
parsed = folderparser.parsefolder(sharksfolder)
|
|
testImagePath = "/train/images/sharks_mp4-20_jpg.rf.5359121123e86e016401ea2731e47949.jpg"
|
|
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
|
|
expectAnnotationFile = "/train/labels/sharks_mp4-20_jpg.rf.5359121123e86e016401ea2731e47949.txt"
|
|
assert testImage["annotationfile"]["file"] == expectAnnotationFile
|
|
assert testImage["annotationfile"]["labelmap"] == {0: "fish", 1: "primary", 2: "shark"}
|
|
|
|
test(f"{thisdir}/../datasets/sharks-tiny-yolov9")
|
|
test(f"{thisdir}/../datasets/sharks-tiny-yolov9/") # this was a bug once, can you believe it?
|
|
|
|
def test_parse_mosquitos_csv(self):
|
|
folder = f"{thisdir}/../datasets/mosquitos"
|
|
parsed = folderparser.parsefolder(folder)
|
|
testImagePath = "/train_10308.jpeg"
|
|
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
|
|
assert testImage["annotationfile"]["name"] == "annotation.csv"
|
|
expected = "img_fName,img_w,img_h,class_label,bbx_xtl,bbx_ytl,bbx_xbr,bbx_ybr\n"
|
|
expected += "train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815\n"
|
|
assert testImage["annotationfile"]["rawText"] == expected
|
|
|
|
def test_paligemma_format(self):
|
|
folder = f"{thisdir}/../datasets/paligemma"
|
|
parsed = folderparser.parsefolder(folder)
|
|
testImagePath = "/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg"
|
|
testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
|
|
assert testImage["annotationfile"]["name"] == "annotation.jsonl"
|
|
expected = (
|
|
'{"image": "de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg",'
|
|
' "prefix": "Which sector had the highest ROI in 2013?", "suffix": "Retail"}\n'
|
|
'{"image": "de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg",'
|
|
' "prefix": "Which sector had the highest ROI in 2014?", "suffix": "Electronics"}'
|
|
)
|
|
assert testImage["annotationfile"]["rawText"] == expected
|
|
|
|
def test_parse_classification_folder_structure(self):
|
|
classification_folder = f"{thisdir}/../datasets/corrosion-singlelabel-classification"
|
|
parsed = folderparser.parsefolder(classification_folder, is_classification=False)
|
|
for img in parsed["images"]:
|
|
self.assertIsNone(img.get("annotationfile"))
|
|
|
|
parsed_classification = folderparser.parsefolder(classification_folder, is_classification=True)
|
|
corrosion_images = [i for i in parsed_classification["images"] if "Corrosion" in i["dirname"]]
|
|
self.assertTrue(len(corrosion_images) > 0)
|
|
for img in corrosion_images:
|
|
self.assertIsNotNone(img.get("annotationfile"))
|
|
self.assertEqual(img["annotationfile"]["type"], "classification_folder")
|
|
self.assertEqual(img["annotationfile"]["classification_label"], "Corrosion")
|
|
no_corrosion_images = [i for i in parsed_classification["images"] if "no-corrosion" in i["dirname"]]
|
|
self.assertTrue(len(no_corrosion_images) > 0)
|
|
for img in no_corrosion_images:
|
|
self.assertIsNotNone(img.get("annotationfile"))
|
|
self.assertEqual(img["annotationfile"]["type"], "classification_folder")
|
|
self.assertEqual(img["annotationfile"]["classification_label"], "no-corrosion")
|
|
|
|
def test_parse_multilabel_classification_csv(self):
|
|
folder = f"{thisdir}/../datasets/skinproblem-multilabel-classification"
|
|
parsed = folderparser.parsefolder(folder, is_classification=True)
|
|
images = {img["name"]: img for img in parsed["images"]}
|
|
img1 = images.get("101_jpg.rf.ffb91e580c891eb04b715545274b2469.jpg")
|
|
self.assertIsNotNone(img1)
|
|
self.assertEqual(img1["annotationfile"]["type"], "classification_multilabel")
|
|
self.assertEqual(set(img1["annotationfile"]["labels"]), {"Blackheads"})
|
|
|
|
|
|
def _assertJsonMatchesFile(actual, filename):
|
|
with open(filename) as file:
|
|
expected = json.load(file)
|
|
return actual == expected
|