Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added F1 score; reduced default metric list #9

Merged
merged 3 commits into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 96 additions & 1 deletion isles/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,4 +335,99 @@ def accuracy(truth, prediction, batchwise=False):
prediction[sample_idx,...].reshape((num_pred,)),
batchwise=False)
accuracy_list.append(sample_accuracy)
return tuple(accuracy_list)
return tuple(accuracy_list)


def _lesion_f1_score(truth, prediction, empty_value=1.0):
"""
Computes the lesion-wise F1-score between two masks. Masks are considered true positives if at least one voxel
overlaps between the truth and the prediction.

Parameters
----------
truth : array-like, bool
3D array. If not boolean, will be converted.
prediction : array-like, bool
3D array with a shape matching 'truth'. If not boolean, will be converted.
empty_value : scalar, float
Optional. Value to which to default if there are no labels. Default: 1.0.

Returns
-------
f1_score : float
Lesion-wise F1-score as float.
Max score = 1
Min score = 0
If both images are empty (tp + fp + fn =0) = empty_value

Notes
-----
This function computes lesion-wise score by defining true positive lesions (tp), false positive lesions (fp) and
false negative lesions (fn) using 3D connected-component-analysis.

tp: 3D connected-component from the ground-truth image that overlaps at least on one voxel with the prediction image.
fp: 3D connected-component from the prediction image that has no voxel overlapping with the ground-truth image.
fn: 3d connected-component from the ground-truth image that has no voxel overlapping with the prediction image.
"""
tp, fp, fn = 0, 0, 0
f1_score = empty_value

labeled_ground_truth, num_lesions = scipy.ndimage.label(truth.astype(bool))

# For each true lesion, check if there is at least one overlapping voxel. This determines true positives and
# false negatives (unpredicted lesions)
for idx_lesion in range(1, num_lesions+1):
lesion = labeled_ground_truth == idx_lesion
lesion_pred_sum = lesion + prediction
if(np.max(lesion_pred_sum) > 1):
tp += 1
else:
fn += 1

# For each predicted lesion, check if there is at least one overlapping voxel in the ground truth.
labaled_prediction, num_pred_lesions = scipy.ndimage.label(prediction.astype(bool))
for idx_lesion in range(1, num_pred_lesions+1):
lesion = labaled_prediction == idx_lesion
lesion_pred_sum = lesion + truth
if(np.max(lesion_pred_sum) <= 1): # No overlap
fp += 1

# Compute f1_score
denom = tp + (fp + fn)/2
if(denom != 0):
f1_score = tp / denom
return f1_score


def lesion_f1_score(truth, prediction, batchwise=False):
""" Computes the F1 score lesionwise. Lesions are considered accurately predicted if a single voxel overlaps between
a region in `truth` and `prediction`.

Parameters
----------
truth : array-like, bool
Array containing the ground truth of a sample, of shape (channel, x, y, z). Returned F1 score is the mean
across the channels. If batchwise=True, array should be 5D with (batch, channel, x, y, z).
prediction : array-like, bool
Array containing predictions for a sample; description is otherwise identical to `truth`.
batchwise : bool
Optional. Indicate whether the computation should be done batchwise, assuming that the first dimension of the
data is the batch. Default: False.

Returns
-------
float or tuple
Lesion-wise F1-score. If batchwise=True, the tuple is the F1-score for every sample.
"""
if not batchwise:
num_channel = truth.shape[0]
f1_score = _lesion_f1_score(truth[0, ...], prediction[0, ...])
for i in range(1, num_channel):
f1_score += _lesion_f1_score(truth[i, ...], prediction[i, ...])
return f1_score / num_channel
else:
f1_list = []
num_batch = truth.shape[0]
for idx_batch in range(num_batch):
f1_list.append(lesion_f1_score(truth[idx_batch, ...], prediction[idx_batch, ...], batchwise=False))
return f1_list
8 changes: 2 additions & 6 deletions settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from isles.scoring import dice_coef, volume_difference, simple_lesion_count_difference, precision, sensitivity, \
specificity, accuracy, lesion_count_by_weighted_assignment
from isles.scoring import dice_coef, volume_difference, simple_lesion_count_difference, lesion_f1_score

eval_settings = {
"GroundTruthRoot": "/opt/evaluation/ground-truth/", # Path to the ground truth
Expand All @@ -22,8 +21,5 @@
"ScoringFunctions": {'Dice': dice_coef, # Functions to use for scoring the dataset.
'Volume Difference': volume_difference,
'Simple Lesion Count': simple_lesion_count_difference,
'Precision': precision,
'Sensitivity': sensitivity,
'Specificity': specificity,
'Accuracy': accuracy}
'Lesionwise F1-Score': lesion_f1_score}
}