Source code for hyponic.utils.problem_identifier

import numpy as np
from enum import Enum


[docs]class ProblemType(Enum): """ This class is used to identify the problem type using dataset. It can be used for regression and classification only """ REGRESSION = 1 BINARY_CLASSIFICATION = 2 MULTICLASS_CLASSIFICATION = 3
[docs]class ProblemIdentifier: """ This class is used to identify the problem type using dataset. It can be used for regression and classification only """ def __init__(self, y): self.y = y
[docs] def get_problem_type(self): """ This method identifies the problem type using number of classes and data type of y. If number of classes is 2 and data type of y is int64, then it is a binary classification problem If number of classes is more than 2 and data type of y is int64, then it is a multiclass classification problem If data type of y is not int64, then it is a regression problem """ target_type = type(self.y[0]) number_of_classes = len(np.unique(self.y)) if target_type in [np.int64, np.int32, np.int16, np.int8]: if number_of_classes == 2: return ProblemType.BINARY_CLASSIFICATION else: return ProblemType.MULTICLASS_CLASSIFICATION else: return ProblemType.REGRESSION