#pip install sklearn #https://en.wikipedia.org/wiki/Decision_tree #https://en.wikipedia.org/wiki/Decision_tree_learning #https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html #. max_depth, min_samples_leaf, #Classification and Regression Trees #n observations = rows #m = features or columns # FEVER COUGHING HEADACHE FLU #0 1 0 0 1 #1 0 1 1 1 #2 0 1 1 0 #3 0 1 0 0 #4 1 1 0 1 #As with other classifiers, DecisionTreeClassifier takes as input two arrays: # an array X, sparse or dense, of shape (n_samples, n_features) holding the training samples, # and an array Y of integer values, shape (n_samples,), holding the class labels for the training samples: from sklearn import tree from joblib import dump, load import pickle from io import StringIO #FEVER COUGHING HEADACHE FLU X = [ [1, 0,0,1], [0,1,1,1], [0,1,1,0], [0,1,0,0], [0,1,0,1] ] #COCO virus results yes or no , 1 = yes Y = [1,0,0,0,1] from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier(n_estimators=10) clf = clf.fit(X, Y) #Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample. # hencetwo square brackets [[ ] print(clf.predict_proba([[0,1,1,0]] ) ) print(clf.predict_proba([[0,0,0,1]]) ) #As an alternative to outputting a specific class, # the probability of each class can be predicted, # which is the fraction of training samples of the class in a leaf: