Spaces:
Runtime error
Runtime error
| from unittest import TestCase | |
| from multi_label_precision_recall_accuracy_fscore import MultiLabelPrecisionRecallAccuracyFscore | |
| class MultiLabelPrecisionRecallAccuracyFscoreTest(TestCase): | |
| """ | |
| All of these tests are also used for multiset configuration. So please mind this and write the test in a way that | |
| it is valid for both configurations (do not use same label multiple times). | |
| """ | |
| def setUp(self): | |
| self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore() | |
| def test_eok(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "accuracy": 1.0, | |
| "fscore": 1.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1], | |
| [1, 2], | |
| [0, 1, 2], | |
| ], | |
| references=[ | |
| [0, 1], | |
| [1, 2], | |
| [0, 1, 2], | |
| ] | |
| ) | |
| ) | |
| def test_eok_string(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "accuracy": 1.0, | |
| "fscore": 1.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| ["0", "1"], | |
| ["1", "2"], | |
| ["0", "1", "2"], | |
| ], | |
| references=[ | |
| ["0", "1"], | |
| ["1", "2"], | |
| ["0", "1", "2"], | |
| ] | |
| ) | |
| ) | |
| def test_empty(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "accuracy": 1.0, | |
| "fscore": 1.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [], | |
| [], | |
| [], | |
| ], | |
| references=[ | |
| [], | |
| [], | |
| [], | |
| ] | |
| ) | |
| ) | |
| def test_empty_reference(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 0.0, | |
| "recall": 0.0, | |
| "accuracy": 0.0, | |
| "fscore": 0.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1], | |
| [1, 2], | |
| [0, 1, 2], | |
| ], | |
| references=[ | |
| [], | |
| [], | |
| [], | |
| ] | |
| ) | |
| ) | |
| def test_empty_prediction(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 0.0, | |
| "recall": 0.0, | |
| "accuracy": 0.0, | |
| "fscore": 0.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [], | |
| [], | |
| [], | |
| ], | |
| references=[ | |
| [0, 1], | |
| [1, 2], | |
| [0, 1, 2], | |
| ] | |
| ) | |
| ) | |
| def test_completely_different(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 0.0, | |
| "recall": 0.0, | |
| "accuracy": 0.0, | |
| "fscore": 0.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1], | |
| [1, 2], | |
| [0, 1, 2], | |
| ], | |
| references=[ | |
| [3, 4], | |
| [5, 6], | |
| [7, 8, 9], | |
| ] | |
| ) | |
| ) | |
| def test_max_precision(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 1.0, | |
| "recall": 0.5, | |
| "accuracy": 0.5, | |
| "fscore": 2 / 3 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1] | |
| ], | |
| references=[ | |
| [0, 1, 2, 3] | |
| ] | |
| ) | |
| ) | |
| def test_max_recall(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 0.5, | |
| "recall": 1.0, | |
| "accuracy": 0.5, | |
| "fscore": 2 / 3 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1, 2, 3] | |
| ], | |
| references=[ | |
| [0, 1] | |
| ] | |
| ) | |
| ) | |
| def test_partial_match(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 0.5, | |
| "recall": 0.5, | |
| "accuracy": 1 / 3, | |
| "fscore": 0.5 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1] | |
| ], | |
| references=[ | |
| [0, 2] | |
| ] | |
| ) | |
| ) | |
| def test_partial_match_multi_sample(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 2.5 / 3, | |
| "recall": 2 / 3, | |
| "accuracy": 0.5, | |
| "fscore": 2 * (2.5 / 3 * 2 / 3) / (2.5 / 3 + 2 / 3) | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1], | |
| [0, 1], | |
| [2, 3] | |
| ], | |
| references=[ | |
| [0, 1, 2, 3], | |
| [0, 1, 2, 3], | |
| [2] | |
| ] | |
| ) | |
| ) | |
| def test_beta(self): | |
| self.multi_label_precision_recall_accuracy_fscore.beta = 2 | |
| self.assertDictEqual( | |
| { | |
| "precision": 2.5 / 3, | |
| "recall": 2 / 3, | |
| "accuracy": 0.5, | |
| "fscore": 5 * (2.5 / 3 * 2 / 3) / (4 * 2.5 / 3 + 2 / 3) | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1], | |
| [0, 1], | |
| [2, 3] | |
| ], | |
| references=[ | |
| [0, 1, 2, 3], | |
| [0, 1, 2, 3], | |
| [2] | |
| ] | |
| ) | |
| ) | |
| self.assertDictEqual( | |
| { | |
| "precision": 2.5 / 3, | |
| "recall": 2 / 3, | |
| "accuracy": 0.5, | |
| "fscore": 10 * (2.5 / 3 * 2 / 3) / (9 * 2.5 / 3 + 2 / 3) | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1], | |
| [0, 1], | |
| [2, 3] | |
| ], | |
| references=[ | |
| [0, 1, 2, 3], | |
| [0, 1, 2, 3], | |
| [2] | |
| ], | |
| beta=3 | |
| ) | |
| ) | |
| class MultiLabelPrecisionRecallAccuracyFscoreTestMultiset(MultiLabelPrecisionRecallAccuracyFscoreTest): | |
| def setUp(self): | |
| self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore( | |
| config_name="multiset") | |
| def test_multiset_eok(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "accuracy": 1.0, | |
| "fscore": 1.0 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1, 1], | |
| [1, 2, 2], | |
| [0, 1, 2, 1], | |
| ], | |
| references=[ | |
| [1, 0, 1], | |
| [1, 2, 2], | |
| [0, 1, 1, 2], | |
| ] | |
| ) | |
| ) | |
| def test_multiset_partial_match(self): | |
| self.assertDictEqual( | |
| { | |
| "precision": 1.0, | |
| "recall": 0.5, | |
| "accuracy": 0.5, | |
| "fscore": 2 / 3 | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1, 1] | |
| ], | |
| references=[ | |
| [1, 0, 1, 1, 0, 0], | |
| ] | |
| ) | |
| ) | |
| def test_multiset_partial_match_multi_sample(self): | |
| p = (1 + 2 / 3) / 2 | |
| r = (3 / 4 + 1) / 2 | |
| self.assertDictEqual( | |
| { | |
| "precision": p, | |
| "recall": r, | |
| "accuracy": (3 / 4 + 2 / 3) / 2, | |
| "fscore": 2 * p * r / (p + r) | |
| }, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1, 1], | |
| [1, 2, 2] | |
| ], | |
| references=[ | |
| [1, 0, 1, 1], | |
| [1, 2], | |
| ] | |
| ) | |
| ) | |
| def test_zero_cardinality_precision(self): | |
| self.multi_label_precision_recall_accuracy_fscore.zero_cardinality_precision = 0.5 | |
| self.assertEqual(0.5, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [] | |
| ], | |
| references=[ | |
| [0, 1, 1], | |
| ] | |
| )["precision"] | |
| ) | |
| self.assertEqual(1.0, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [] | |
| ], | |
| references=[ | |
| [], | |
| ] | |
| )["precision"] | |
| ) | |
| self.assertEqual(2 / 3, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [1, 2, 3] | |
| ], | |
| references=[ | |
| [1, 2], | |
| ] | |
| )["precision"] | |
| ) | |
| def test_zero_cardinality_recall(self): | |
| self.multi_label_precision_recall_accuracy_fscore.zero_cardinality_recall = 0.5 | |
| self.assertEqual(0.5, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [0, 1, 1], | |
| ], | |
| references=[ | |
| [] | |
| ] | |
| )["recall"] | |
| ) | |
| self.assertEqual(1.0, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [], | |
| ], | |
| references=[ | |
| [], | |
| ] | |
| )["recall"] | |
| ) | |
| self.assertEqual(2 / 3, | |
| self.multi_label_precision_recall_accuracy_fscore.compute( | |
| predictions=[ | |
| [1, 2], | |
| ], | |
| references=[ | |
| [1, 2, 3] | |
| ] | |
| )["recall"] | |
| ) | |