# Prediction utilities
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import torch
from torchvision.ops import nms
from deepforest import preprocess
from deepforest import visualize
from skimage import io
[docs]def predict_image(model, image, return_plot, device, iou_threshold=0.1):
"""Predict an image with a deepforest model
Args:
image: a numpy array of a RGB image ranged from 0-255
path: optional path to read image from disk instead of passing image arg
return_plot: Return image with plotted detections
device: pytorch device of 'cuda' or 'cpu' for gpu prediction. Set internally.
Returns:
boxes: A pandas dataframe of predictions (Default)
img: The input with predictions overlaid (Optional)
"""
image = preprocess.preprocess_image(image)
image = image.to(device)
prediction = model(image)
# return None for no predictions
if len(prediction[0]["boxes"]) == 0:
return None
# This function on takes in a single image.
df = visualize.format_boxes(prediction[0])
df = across_class_nms(df, iou_threshold=iou_threshold)
if return_plot:
# Matplotlib likes no batch dim and channels first
image = image.squeeze(0).permute(1, 2, 0)
plot, ax = visualize.plot_predictions(image, df)
return plot
else:
return df
[docs]def predict_file(model, csv_file, root_dir, savedir, device, iou_threshold=0.1):
"""Create a dataset and predict entire annotation file
Csv file format is .csv file with the columns "image_path", "xmin","ymin","xmax","ymax" for the image name and bounding box position.
Image_path is the relative filename, not absolute path, which is in the root_dir directory. One bounding box per line.
Args:
csv_file: path to csv file
root_dir: directory of images. If none, uses "image_dir" in config
savedir: Optional. Directory to save image plots.
device: pytorch device of 'cuda' or 'cpu' for gpu prediction. Set internally.
Returns:
df: pandas dataframe with bounding boxes, label and scores for each image in the csv file
"""
input_csv = pd.read_csv(csv_file)
# Just predict each image once.
images = input_csv["image_path"].unique()
prediction_list = []
for path in images:
image = io.imread("{}/{}".format(root_dir, path))
image = preprocess.preprocess_image(image)
# Just predict the images, even though we have the annotations
if not device.type == "cpu":
image = image.to(device)
prediction = model(image)
prediction = visualize.format_boxes(prediction[0])
prediction = across_class_nms(prediction, iou_threshold = iou_threshold)
prediction["image_path"] = path
prediction_list.append(prediction)
if savedir:
#if on GPU, bring back to cpu for plotting
# Just predict the images, even though we have the annotations
if not device.type == "cpu":
image = image.to("cpu")
image = image.squeeze(0).permute(1, 2, 0)
plot, ax = visualize.plot_predictions(image, prediction)
annotations = input_csv[input_csv.image_path == path]
plot = visualize.add_annotations(plot, ax, annotations)
plot.savefig("{}/{}.png".format(savedir, os.path.splitext(path)[0]),dpi=300)
df = pd.concat(prediction_list, ignore_index=True)
return df
[docs]def predict_tile(model,
device,
raster_path=None,
image=None,
patch_size=400,
patch_overlap=0.05,
iou_threshold=0.15,
return_plot=False,
use_soft_nms=False,
sigma=0.5,
thresh=0.001):
"""For images too large to input into the model, predict_tile cuts the
image into overlapping windows, predicts trees on each window and
reassambles into a single array.
Args:
model: pytorch model
device: pytorch device of 'cuda' or 'cpu' for gpu prediction. Set internally.
numeric_to_label_dict: dictionary in which keys are numeric integers and values are character labels
raster_path: Path to image on disk
image (array): Numpy image array in BGR channel order
following openCV convention
patch_size: patch size default400,
patch_overlap: patch overlap default 0.15,
iou_threshold: Minimum iou overlap among predictions between
windows to be suppressed. Defaults to 0.14.
Lower values suppress more boxes at edges.
return_plot: Should the image be returned with the predictions drawn?
use_soft_nms: whether to perform Gaussian Soft NMS or not, if false, default perform NMS.
sigma: variance of Gaussian function used in Gaussian Soft NMS
thresh: the score thresh used to filter bboxes after soft-nms performed
Returns:
boxes (array): if return_plot, an image.
Otherwise a numpy array of predicted bounding boxes, scores and labels
"""
if image is not None:
pass
else:
# load raster as image
image = io.imread(raster_path)
# Compute sliding window index
windows = preprocess.compute_windows(image, patch_size, patch_overlap)
# Save images to tempdir
predicted_boxes = []
for index, window in enumerate(tqdm(windows)):
# crop window and predict
crop = image[windows[index].indices()]
# crop is RGB channel order, change to BGR?
boxes = predict_image(model=model, image=crop, return_plot=False, device=device)
if boxes is not None:
# transform the coordinates to original system
xmin, ymin, xmax, ymax = windows[index].getRect()
boxes.xmin = boxes.xmin + xmin
boxes.xmax = boxes.xmax + xmin
boxes.ymin = boxes.ymin + ymin
boxes.ymax = boxes.ymax + ymin
predicted_boxes.append(boxes)
if len(predicted_boxes) == 0:
print("No predictions made, returning None")
return None
predicted_boxes = pd.concat(predicted_boxes)
# Non-max supression for overlapping boxes among window
if patch_overlap == 0:
mosaic_df = predicted_boxes
else:
print(
f"{predicted_boxes.shape[0]} predictions in overlapping windows, applying non-max supression"
)
# move prediciton to tensor
boxes = torch.tensor(predicted_boxes[["xmin", "ymin", "xmax", "ymax"]].values,
dtype=torch.float32)
scores = torch.tensor(predicted_boxes.score.values, dtype=torch.float32)
labels = predicted_boxes.label.values
if not use_soft_nms:
# Performs non-maximum suppression (NMS) on the boxes according to
# their intersection-over-union (IoU).
bbox_left_idx = nms(boxes=boxes, scores=scores, iou_threshold=iou_threshold)
else:
# Performs soft non-maximum suppression (soft-NMS) on the boxes.
bbox_left_idx = soft_nms(boxes=boxes,
scores=scores,
sigma=sigma,
thresh=thresh)
bbox_left_idx = bbox_left_idx.numpy()
new_boxes, new_labels, new_scores = boxes[bbox_left_idx].type(
torch.int), labels[bbox_left_idx], scores[bbox_left_idx]
# Recreate box dataframe
image_detections = np.concatenate([
new_boxes,
np.expand_dims(new_labels, axis=1),
np.expand_dims(new_scores, axis=1)
],
axis=1)
mosaic_df = pd.DataFrame(
image_detections, columns=["xmin", "ymin", "xmax", "ymax", "label", "score"])
print(f"{mosaic_df.shape[0]} predictions kept after non-max suppression")
if return_plot:
# Draw predictions
plot, _ = visualize.plot_predictions(image, mosaic_df)
# Mantain consistancy with predict_image
return plot
else:
return mosaic_df
[docs]def soft_nms(boxes, scores, sigma=0.5, thresh=0.001):
'''
Perform python soft_nms to reduce the confidances of the proposals proportional to IoU value
Paper: Improving Object Detection With One Line of Code
Code : https://github.com/DocF/Soft-NMS/blob/master/softnms_pytorch.py
Args:
boxes: predicitons bounding boxes tensor format [x1,y1,x2,y2]
scores: the score corresponding to each box tensors
sigma: variance of Gaussian function
thresh: score thresh
Return:
idxs_keep: the index list of the selected boxes
'''
# indexes concatenate boxes with the last column
N = boxes.shape[0]
indexes = torch.arange(0, N, dtype=torch.float).view(N, 1)
boxes = torch.cat((boxes, indexes), dim=1)
# The order of boxes coordinate is [x1,y1,y2,x2]
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
for i in range(N):
# intermediate parameters for later parameters exchange
tscore = scores[i].clone()
pos = i + 1
if i != N - 1:
maxscore, maxpos = torch.max(scores[pos:], dim=0)
if tscore < maxscore:
boxes[i], boxes[maxpos.item() + i +
1] = boxes[maxpos.item() + i +
1].clone(), boxes[i].clone()
scores[i], scores[maxpos.item() + i +
1] = scores[maxpos.item() + i +
1].clone(), scores[i].clone()
areas[i], areas[maxpos + i + 1] = areas[maxpos + \
i + 1].clone(), areas[i].clone()
# IoU calculate
xx1 = np.maximum(boxes[i, 0].numpy(), boxes[pos:, 0].numpy())
yy1 = np.maximum(boxes[i, 1].numpy(), boxes[pos:, 1].numpy())
xx2 = np.minimum(boxes[i, 2].numpy(), boxes[pos:, 2].numpy())
yy2 = np.minimum(boxes[i, 3].numpy(), boxes[pos:, 3].numpy())
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = torch.tensor(w * h)
ovr = torch.div(inter, (areas[i] + areas[pos:] - inter))
# Gaussian decay
weight = torch.exp(-(ovr * ovr) / sigma)
scores[pos:] = weight * scores[pos:]
# select the boxes and keep the corresponding indexes
idxs_keep = boxes[:, 4][scores > thresh].int()
return idxs_keep
[docs]def across_class_nms(predicted_boxes, iou_threshold=0.15):
"""perform non-max suppression for a dataframe of results (see visualize.format_boxes) to remove boxes that overlap by iou_thresholdold of IoU"""
# move prediciton to tensor
boxes = torch.tensor(predicted_boxes[["xmin", "ymin", "xmax", "ymax"]].values,
dtype=torch.float32)
scores = torch.tensor(predicted_boxes.score.values, dtype=torch.float32)
labels = predicted_boxes.label.values
bbox_left_idx = nms(boxes=boxes, scores=scores, iou_threshold=iou_threshold)
bbox_left_idx = bbox_left_idx.numpy()
new_boxes, new_labels, new_scores = boxes[bbox_left_idx].type(
torch.int), labels[bbox_left_idx], scores[bbox_left_idx]
# Recreate box dataframe
image_detections = np.concatenate([
new_boxes,
np.expand_dims(new_labels, axis=1),
np.expand_dims(new_scores, axis=1)
],
axis=1)
new_df = pd.DataFrame(
image_detections, columns=["xmin", "ymin", "xmax", "ymax", "label", "score"])
return new_df