Skip to content

Commit d34e78d

Browse files
author
nauyan
committed
adding multiple dataset support, updating gitignore
1 parent 12790f9 commit d34e78d

15 files changed

+555
-121
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@ notebooks
33
.DS_Store
44
checkpoints
55
run_logs
6-
data
6+
data
7+
.ipynb_checkpoints
8+
temp.ipynb

config.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,30 @@
11
version = 1.0
22

3-
dataset_name = "consep"
4-
dataset_raw = "data/archives/CoNSeP/"
5-
train_dir = "data/consep/train/"
6-
test_dir = "data/consep/test/"
3+
# dataset_name = "consep"
4+
# dataset_raw = "data/archives/CoNSeP/"
5+
# train_dir = "data/consep/train/"
6+
# test_dir = "data/consep/test/"
77

8+
# Combined Dataset CoNSep, PanNuke, Lizard
9+
dataset_name = "all"
10+
dataset_raw = None
11+
train_dir = "data/all/train/"
12+
test_dir = "data/all/test/"
813

914
checkpoints_dir = "checkpoints"
1015
tensorboard_logs = "run_logs"
1116

1217
encoder = "tu-tf_efficientnet_b5_ns"
1318
encoder_weights = None
1419

15-
inference_weights = "NC-Net_consep_metric.pth"
20+
# CoNSep Weights
21+
# inference_weights = "NC-Net_consep_metric.pth"
22+
23+
# All Weights
24+
inference_weights = "NC-Net_all_metric.pth"
1625

1726
device = "cuda"
1827
batch_size = 16
1928
epochs = 1000
2029
learning_rate = 0.01
21-
watershed_threshold = 0.2
30+
watershed_threshold = 0.2

create_dataset.py

+10-19
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def read_annotation(annotPath):
4848
return centroid_map, inst_map, nuclear_map, type_map
4949

5050

51-
5251
test_folder = glob.glob(os.path.join(config.dataset_raw, "Test/Images"))[0]
5352
train_folder = glob.glob(os.path.join(config.dataset_raw, "Train/Images"))[0]
5453

@@ -70,8 +69,7 @@ def read_annotation(annotPath):
7069
print(img_path)
7170
image = read_image(img_path)
7271
centroid_map, inst_map, nuclear_map, type_map = read_annotation(
73-
img_path.replace("Images", "Labels").replace(".png", ".mat")
74-
)
72+
img_path.replace("Images", "Labels").replace(".png", ".mat"))
7573

7674
output_mask = np.zeros((image.shape[0], image.shape[1], 7))
7775
output_mask[:, :, :3] = image / 255.0
@@ -80,17 +78,14 @@ def read_annotation(annotPath):
8078
output_mask[:, :, 5] = centroid_map # centroids_mask
8179
output_mask[:, :, 6] = type_map # type mask
8280

83-
output = util.view_as_windows(
84-
output_mask, (256, 256, 7), step=(245, 245, 7)
85-
).reshape(
86-
-1, 256, 256, 7
87-
)
81+
output = util.view_as_windows(output_mask, (256, 256, 7),
82+
step=(245, 245, 7)).reshape(-1, 256, 256, 7)
8883

8984
for idx in range(output.shape[0]):
9085
np.save(
9186
"{}/{}_{}.npy".format(
92-
config.test_dir, os.path.basename(img_path).replace(".png", ""), idx
93-
),
87+
config.test_dir,
88+
os.path.basename(img_path).replace(".png", ""), idx),
9489
output[idx],
9590
)
9691

@@ -99,8 +94,7 @@ def read_annotation(annotPath):
9994

10095
image = read_image(img_path)
10196
centroid_map, inst_map, nuclear_map, type_map = read_annotation(
102-
img_path.replace("Images", "Labels").replace(".png", ".mat")
103-
)
97+
img_path.replace("Images", "Labels").replace(".png", ".mat"))
10498

10599
output_mask = np.zeros((image.shape[0], image.shape[1], 7))
106100
output_mask[:, :, :3] = image / 255.0
@@ -109,16 +103,13 @@ def read_annotation(annotPath):
109103
output_mask[:, :, 5] = centroid_map # centroids_mask
110104
output_mask[:, :, 6] = type_map # type mask
111105

112-
output = util.view_as_windows(
113-
output_mask, (256, 256, 7), step=(128, 128, 7)
114-
).reshape(
115-
-1, 256, 256, 7
116-
)
106+
output = util.view_as_windows(output_mask, (256, 256, 7),
107+
step=(128, 128, 7)).reshape(-1, 256, 256, 7)
117108

118109
for idx in range(output.shape[0]):
119110
np.save(
120111
"{}/{}_{}.npy".format(
121-
config.train_dir, os.path.basename(img_path).replace(".png", ""), idx
122-
),
112+
config.train_dir,
113+
os.path.basename(img_path).replace(".png", ""), idx),
123114
output[idx],
124115
)

create_dataset_all.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import shutil
2+
import os
3+
4+
from tqdm import tqdm
5+
from glob import glob
6+
from sklearn.model_selection import train_test_split
7+
8+
samples = glob("data/consep/*/*.npy")
9+
samples.extend(glob("data/liz1/*/*.npy"))
10+
samples.extend(glob("data/pan1/*/*.npy"))
11+
12+
X_train, X_test, y_train, y_test = train_test_split(samples,
13+
samples,
14+
test_size=0.1,
15+
random_state=42)
16+
17+
for sample in tqdm(X_train, total=len(X_train)):
18+
shutil.copyfile(sample, f"data/all/train/{os.path.basename(sample)}")
19+
20+
for sample in tqdm(X_test, total=len(X_test)):
21+
shutil.copyfile(sample, f"data/all/test/{os.path.basename(sample)}")

create_dataset_consep.py

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from skimage import data, io, util
2+
from matplotlib import pyplot as plt
3+
4+
import scipy.io as sio
5+
import numpy as np
6+
7+
import glob
8+
import cv2
9+
import os
10+
import shutil
11+
import config
12+
13+
14+
def create_centroid_mask(centroids, height, width):
15+
cent_img = np.zeros((height, width))
16+
cent_img[tuple(centroids.T)] = 1
17+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
18+
centroids_mask = cv2.dilate(cent_img, kernel, iterations=2)
19+
20+
return centroids_mask
21+
22+
23+
def read_image(imagePath):
24+
image = io.imread(imagePath)[:, :, :3]
25+
26+
return image
27+
28+
29+
def fix_type(type_map):
30+
type_map[(type_map == 3) | (type_map == 4)] = 3
31+
type_map[(type_map == 5) | (type_map == 6) | (type_map == 7)] = 4
32+
return type_map
33+
34+
35+
def read_annotation(annotPath):
36+
mask = sio.loadmat(annotPath)
37+
38+
type_map = fix_type(mask["type_map"])
39+
centroid_map = create_centroid_mask(
40+
mask["inst_centroid"].astype(int)[:, ::-1],
41+
mask["inst_map"].shape[0],
42+
mask["inst_map"].shape[1],
43+
)
44+
inst_map = mask["inst_map"]
45+
nuclear_map = inst_map + 0
46+
nuclear_map[nuclear_map != 0] = 1
47+
48+
return centroid_map, inst_map, nuclear_map, type_map
49+
50+
51+
def init_dir(input_dir):
52+
if not os.path.exists(input_dir):
53+
os.makedirs(input_dir)
54+
else:
55+
shutil.rmtree(input_dir)
56+
os.makedirs(input_dir)
57+
58+
59+
def process_subset(input_dir, output_dir, image_size=256, step_size=245):
60+
61+
for img_path in glob.glob("{}/*".format(input_dir)):
62+
print(img_path)
63+
image = read_image(img_path)
64+
centroid_map, inst_map, nuclear_map, type_map = read_annotation(
65+
img_path.replace("Images", "Labels").replace(".png", ".mat"))
66+
67+
output_mask = np.zeros((image.shape[0], image.shape[1], 7))
68+
output_mask[:, :, :3] = image / 255.0
69+
output_mask[:, :, 3] = inst_map # inst_mask
70+
output_mask[:, :, 4] = nuclear_map # binary mask
71+
output_mask[:, :, 5] = centroid_map # centroids_mask
72+
output_mask[:, :, 6] = type_map # type mask
73+
74+
output = util.view_as_windows(output_mask, (image_size, image_size, 7),
75+
step=(step_size, step_size, 7)).reshape(
76+
-1, image_size, image_size, 7)
77+
78+
for idx in range(output.shape[0]):
79+
np.save(
80+
"{}/{}_{}.npy".format(
81+
output_dir,
82+
os.path.basename(img_path).replace(".png", ""), idx),
83+
output[idx],
84+
)
85+
86+
87+
test_folder = glob.glob(os.path.join(config.dataset_raw, "Test/Images"))[0]
88+
train_folder = glob.glob(os.path.join(config.dataset_raw, "Train/Images"))[0]
89+
90+
init_dir(config.train_dir)
91+
init_dir(config.test_dir)
92+
93+
process_subset(train_folder, config.train_dir)
94+
process_subset(test_folder, config.test_dir)

create_dataset_lizard.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import glob
2+
from skimage import io
3+
import scipy.io as sio
4+
from skimage import data, io, util
5+
from matplotlib import pyplot as plt
6+
import numpy as np
7+
8+
import cv2
9+
import os
10+
import random
11+
from skimage.transform import rescale, resize, downscale_local_mean
12+
from tqdm import tqdm
13+
import shutil
14+
import pandas as pd
15+
16+
17+
def create_centroid_mask(centroids, height, width):
18+
cent_img = np.zeros((height, width))
19+
cent_img[tuple(centroids.T)] = 1
20+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
21+
centroids_mask = cv2.dilate(cent_img, kernel, iterations=2)
22+
23+
return centroids_mask
24+
25+
26+
def rescale_sample(sample):
27+
sample = rescale(sample,
28+
2,
29+
anti_aliasing=False,
30+
multichannel=True,
31+
preserve_range=True,
32+
order=0)
33+
return sample
34+
35+
36+
def read_image(imagePath):
37+
image = io.imread(imagePath)[:, :, :3]
38+
return image
39+
40+
41+
def read_annotation(annotPath):
42+
mask = sio.loadmat(annotPath)
43+
44+
centroid_map = create_centroid_mask(mask['centroid'].astype(int)[:, ::-1],
45+
mask['inst_map'].shape[0],
46+
mask['inst_map'].shape[1])
47+
inst_map = mask['inst_map']
48+
nuclear_map = inst_map + 0
49+
nuclear_map[nuclear_map != 0] = 1
50+
51+
return centroid_map, inst_map, nuclear_map
52+
53+
54+
def init_dir(input_dir):
55+
if not os.path.exists(input_dir):
56+
os.makedirs(input_dir)
57+
else:
58+
shutil.rmtree(input_dir)
59+
os.makedirs(input_dir)
60+
61+
62+
meta = pd.read_csv("../datasets/lizard/Lizard_Labels/info.csv")
63+
# print(meta.columns)
64+
for fold_idx in range(1, 4):
65+
print("Generating Fold", fold_idx)
66+
67+
train_dir = "data/liz{}/train".format(fold_idx)
68+
test_dir = "data/liz{}/test".format(fold_idx)
69+
70+
init_dir(train_dir)
71+
init_dir(test_dir)
72+
73+
for idx in tqdm(range(meta.shape[0]), total=meta.shape[0]):
74+
75+
image = read_image(
76+
glob.glob("../datasets/lizard/Lizard_Images*/{}.png".format(
77+
meta['Filename'][idx]))[0])
78+
79+
centroid_map, inst_map, nuclear_map = read_annotation(
80+
"../datasets/lizard/Lizard_Labels/Labels/{}.mat".format(
81+
meta['Filename'][idx]))
82+
type_map = np.zeros((image.shape[0], image.shape[1]))
83+
84+
output_mask = np.zeros((image.shape[0], image.shape[1], 7))
85+
output_mask[:, :, :3] = image / 255.0
86+
output_mask[:, :, 3] = inst_map # inst_mask
87+
output_mask[:, :, 4] = nuclear_map # binary mask
88+
output_mask[:, :, 5] = centroid_map # centroids_mask
89+
output_mask[:, :, 6] = type_map # type mask
90+
91+
output_mask = rescale_sample(output_mask)
92+
output = util.view_as_windows(output_mask, (256, 256, 6),
93+
step=(245, 245,
94+
6)).reshape(-1, 256, 256, 6)
95+
96+
for p_idx in range(output.shape[0]):
97+
if fold_idx == meta['Split'][idx]:
98+
np.save("{}/{}_{}.npy".format(test_dir, meta['Filename'][idx],
99+
p_idx),
100+
output[p_idx]) #.astype('int32')
101+
else:
102+
np.save("{}/{}_{}.npy".format(train_dir, meta['Filename'][idx],
103+
p_idx),
104+
output[p_idx]) #.astype('int32')

0 commit comments

Comments
 (0)