\

}, keywords = {Class imbalance, F-measure, Pattern Classification, Performance Metrics, Video Face Recognition, Visualization Tools}, issn = {0031-3203}, doi = {10.1016/j.patcog.2019.107146}, author = {Roghayeh Soleymani and Eric Granger and Giorgio Fumera} } @conference {1422, title = {F-Measure Curves for Visualizing Classifier Performance with Imbalanced Data}, booktitle = {8th IAPR TC3 Workshop on Artificial Neural Networks in Pattern Recognition (ANNPR 2018)}, year = {2018}, month = {In press.}, publisher = {Springer}, organization = {Springer}, address = {Siena}, abstract = {Training classifiers using imbalanced data is a challenging problem in many real-world recognition applications due in part to the bias in performance that occur for: (1) classifiers that are often optimized and compared using unsuitable performance measurements for imbalance problems; (2) classifiers that are trained and tested on a fixed imbalance level of data, which may differ from operational scenarios; (3) cases where the preference of correct classification of classes is application dependent. Specialized performance evaluation metrics and tools are needed for problems that involve class imbalance, including scalar metrics that assume a given operating condition (skew level and relative preference of classes), and global evaluation curves or metrics that consider a range of operating conditions.We propose a global evaluation space for the scalar F-measure metric that is analogous to the cost curves for expected cost. In this space, a classifier is represented as a curve that shows its performance over all of its decision thresholds and a range of imbalance levels for the desired preference of true positive rate to precision. Experiments with synthetic data show the benefits of evaluating and comparing classifiers under different operating conditions in the proposed F-measure space over ROC, precision-recall, and cost spaces.}, keywords = {Class imbalance, F-measure, Performance visualization tools}, author = {Roghayeh Soleymani and Eric Granger and Giorgio Fumera} } @article { 1439, title = {Multi-label classification with a reject option}, journal = {Pattern Recognition}, volume = {46}, year = {2013}, month = {08/2013}, chapter = {2256}, abstract = {

Many multi-label classifiers provide a real-valued score for each class.

A well known design approach consists of tuning the corresponding decision thresholds by optimising the performance measure of interest.

We address two open issues related to the optimisation of the widely used F measure and precision-recall (P-R) curve, with respect to class-related decision thresholds, on a given data set.

(i) We derive properties of the micro-averaged F, which allow its global maximum to be found by an optimisation strategy with a low computational cost.

So far, only a suboptimal threshold selection rule and a greedy algorithm with no optimality guarantee were known.

(ii) We rigorously define the macro and micro P-R curves, analyse a previously suggested strategy for computing them, based on maximising F, and develop two possible implementations, which can be also exploited for optimising related performance measures.

We evaluate our algorithms on five data sets related to three different application domains.

Many multi-label classifiers provide a real-valued score for each class. A well known design approach consists of tuning the corresponding decision thresholds by optimising the performance measure of interest. We address two open issues related to the optimisation of the widely used F measure and precision-recall (P-R) curve, with respect to class-related decision thresholds, on a given data set. (i) We derive properties of the micro-averaged F, which allow its global maximum to be found by an optimisation strategy with a low computational cost. So far, only a suboptimal threshold selection rule and a greedy algorithm with no optimality guarantee were\ known. (ii) We rigorously define the macro and micro P-R curves, analyse a previously suggested strategy for computing them, based on maximising F, and develop two possible implementations, which can be also exploited for optimising related performance measures. We evaluate our algorithms on five data sets related to three different application domains.

}, keywords = {doc00, doc01, F-measure, multi-label categorization, Precision-Recall, thresholding}, author = {Ignazio Pillai and Giorgio Fumera and Fabio Roli} } @conference { 1349, title = {F-Measure Optimisation in Multi-label Classifiers}, booktitle = {21st International Conference on Pattern Recognition}, year = {2012}, month = {11-15/11/2012}, address = {Tsukuba, Japan}, abstract = {When a multi-label classifier outputs a real-valued score for each class, a well known design strategy consists of tuning the corresponding decision thresholds by optimising the performance measure of interest on validation data.

In this paper we focus on the F-measure, which is widely used in multi-label problems.

We derive two properties of the micro-averaged F measure, viewed as a function of the threshold values, which allow its global maximum to be found by an optimisation strategy with an upper bound on computational complexity of O(n^{2} N^{2}), where N and n are respectively the number of classes and of validation samples.

So far, only a suboptimal threshold selection rule and a greedy algorithm without any optimality guarantee were known for this task.

We then devise a possible optimisation algorithm based on our strategy, and evaluate it on three benchmark, multi-label data sets.

\