Find S import numpy as np import pandas as pd data = pd.read_csv("dataset1.csv") sample = np.array(data)[1:,:-1] # Removes the header and last coloumn of data target = np.array(data)[1:,-1] # Removes the header and gives the last coloumn def train(sample, target): # Initializing specific_hypothesis for i in range(len(target)): if target[i] == "Yes": specific_hypothesis = list(sample[i]) break # Tuning specific_hypothesis for i in range(len(sample)): if target[i] == "Yes": for j in range(len(specific_hypothesis)): if sample[i][j] != specific_hypothesis[j] and specific_hypothesis[j] != '?': specific_hypothesis[j] = '?' return specific_hypothesis specific_hypothesis = train(sample, target) print("Specific Hypothesis: ", specific_hypothesis) Candidate Elimination import numpy as np import pandas as pd data = pd.read_csv("dataset1.csv") sample = np.array(data)[1:,:-1] # Removes the header and last coloumn of data target = np.array(data)[1:,-1] # Removes the header and gives the last coloumn def train(sample, target): specific_hypothesis = list(sample[0]) # assumes first sample is positive num_atb = len(specific_hypothesis) generic_hypothesis = [['?' for i in range(num_atb)] for j in range(num_atb)] for i in range(len(sample)): if target[i] == "Yes": if specific_hypothesis != list(sample[i]): for j in range(num_atb): if sample[i][j] != specific_hypothesis[j] and specific_hypothesis[j] != '?': specific_hypothesis[j] = '?' generic_hypothesis[j][j] = '?' else: for j in range(num_atb): if sample[i][j] != specific_hypothesis[j]: generic_hypothesis[j][j] = specific_hypothesis[j] else: generic_hypothesis[j][j] = '?' tmp = ['?' for _ in range(num_atb)] generic_hypothesis = [i for i in generic_hypothesis if i != tmp] return specific_hypothesis, generic_hypothesis specific_hypothesis, generic_hypothesis = train(sample, target) print(f"Specific Hypothesis: {specific_hypothesis}\nGeneric Hypothesis: {generic_hypothesis}") Linear Regression import numpy as np import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv("dataset.csv") x = list(np.array(data)[:,0]) y = list(np.array(data)[:,-1]) mean_x = sum(x) / len(x) mean_y = sum(y) / len(y) deviation_x = [i-mean_x for i in x] deviation_y = [i-mean_y for i in y] product_deviation = [i*j for i, j in zip(deviation_x, deviation_y)] square_deviation_x = [i**2 for i in deviation_x] slope = sum(product_deviation) / sum(square_deviation_x) intercept = mean_y - (slope * mean_x) X = [i for i in range(-1, 20)] Y = [i*slope + intercept for i in X] plt.scatter(x, y) plt.plot(X, Y) plt.show() EM from sklearn.cluster import KMeans from sklearn import preprocessing from sklearn.mixture import GaussianMixture from sklearn.datasets import load_iris import sklearn.metrics as sm import pandas as pd import numpy as np import matplotlib.pyplot as plt dataset=load_iris() X=pd.DataFrame(dataset.data) X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width'] y=pd.DataFrame(dataset.target) y.columns=['Targets'] plt.figure(figsize=(14,7)) colormap=np.array(['red','lime','black']) # REAL PLOT plt.subplot(1,3,1) plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40) plt.title('Real') # K-PLOT plt.subplot(1,3,2) model=KMeans(n_clusters=3) model.fit(X) predY=np.choose(model.labels_,[0,1,2]).astype(np.int64) plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40) plt.title('KMeans') # GMM PLOT scaler=preprocessing.StandardScaler() scaler.fit(X) xsa=scaler.transform(X) xs=pd.DataFrame(xsa,columns=X.columns) gmm=GaussianMixture(n_components=3) gmm.fit(xs) y_cluster_gmm=gmm.predict(xs) plt.subplot(1,3,3) plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40) plt.title('GMM Classification') K-Nearest Neighbor (KNN) from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier as kn from sklearn import datasets iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size = 0.1) for i in range(len(iris.target_names)): print("Label ", i, '- ', str(iris.target_names[i])) k = 3 classifier = kn(n_neighbors = k) classifier.fit(x_train, y_train) y_pred = classifier.predict(x_test) print("Accuracy for k:",k,"is ", classifier.score(x_test, y_test)) Naive Bayesian # import necessary libarities import pandas as pd from sklearn import tree from sklearn.preprocessing import LabelEncoder from sklearn.naive_bayes import GaussianNB # load data from CSV data = pd.read_csv('tennisdata.csv') print("THe first 5 values of data is :\n",data.head()) # obtain Train data and Train output X = data.iloc[:,:-1] print("\nThe First 5 values of train data is\n",X.head()) y = data.iloc[:,-1] print("\nThe first 5 values of Train output is\n",y.head()) # Convert then in numbers le_outlook = LabelEncoder() X.Outlook = le_outlook.fit_transform(X.Outlook) le_Temperature = LabelEncoder() X.Temperature = le_Temperature.fit_transform(X.Temperature) le_Humidity = LabelEncoder() X.Humidity = le_Humidity.fit_transform(X.Humidity) le_Windy = LabelEncoder() X.Windy = le_Windy.fit_transform(X.Windy) print("\nNow the Train data is :\n",X.head()) le_PlayTennis = LabelEncoder() y = le_PlayTennis.fit_transform(y) print("\nNow the Train output is\n",y) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20) classifier = GaussianNB() classifier.fit(X_train,y_train) from sklearn.metrics import accuracy_score print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test)) Naive Bayesian Using API import pandas as pd from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import accuracy_score, confusion_matrix, precision_score,recall_score msg = pd.read_csv('data.csv', names=[ 'message', 'label']) print("Total Instances of Dataset: ", msg.shape[0]) msg[ 'labelnum' ] = msg.label.map({'pos': 1, 'neg': 0}) X = msg.message y = msg.labelnum Xtrain, Xtest, ytrain, ytest = train_test_split(X, y) count_v = CountVectorizer() Xtrain_dm = count_v.fit_transform(Xtrain) Xtest_am = count_v.transform (Xtest) df = pd.DataFrame(Xtrain_dm.toarray(),columns=count_v.get_feature_names()) print(df[0:5]) clf = MultinomialNB() clf. fit(Xtrain_am, ytrain) pred = clf.predict(Xtest_dm) for doc, p in zip(Xtrain, pred): p= 'pos' if p == 1 else 'neg' print("%s -> %s" % (doc, p)) print( 'Accuracy Metrics: \n') print( 'Accuracy: ', accuracy_score(ytest, pred)) print( 'Recall: ', recall_score(ytest, pred)) print( 'Precision: ', precision , precision_score (ytest, pred)) print( 'Confusion Matrix: In', confusion_matrix(ytest, pred)) K-Means from sklearn.cluster import KMeans from sklearn import preprocessing from sklearn.mixture import GaussianMixture from sklearn.datasets import load_iris import sklearn.metrics as sm import pandas as pd import numpy as np import matplotlib.pyplot as plt dataset=load_iris() X=pd.DataFrame(dataset.data) X.columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'] y=pd.DataFrame(dataset.target) y.columns=['Targets'] plt.figure(figsize=(14,7)) colormap=np.array(['red', 'lime', 'black']) # REAL PLOT plt.subplot (1,3,1) plt.scatter X.Petal_Length, X.Petal_Width, C=colormap[y.Targets], 5=40) plt.title('Real') # K-PLOT plt.subplot (1,3,2) model=KMeans(n _clusters=3) model.fit(X) predY=np.choose(model.labels_,[0,1,2]).astype(np.int64) plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40) plt.title( 'KMeans') # GMM PLOT scaler=preprocessing.StandardScaler() scaler.fit(X) xsa=scaler.transform(X) xS=pd.DataFrame(xsa,columns=X.columns) gmm=GaussianMixture(n_components=3) gmm.fit(xs) y_cluster_gmm=gmm.predict(xs) plt.subplot(1,3,3) plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40) plt.title('GMM Classification') Locally Weighted Regression import math from math import ceil import numpy as np from scipy import linalg import matplotlib.pyplot as plt def lowess(x, y, f, iterations): n= len(x) r= int(ceil(f * n)) h= [np.sort(np.abs(x - x[i]))[r] for i in range(n)] w = np. clip(np.abs((x[:, None] - x[None, : 1]) / h), 0.0, 1.0) w=(1 - w ** 3)**3 yest = np.zeros(n) delta = np.ones(n) for iteration in range(iterations): for i in range(n): weights = delta * w[:, i] b = np.array([np.sum(weights * y), пр.sum(weights * y * x)]) A = np.array([[np.sum(weights), пр.sum(weights * x)],[np.sum(weigh beta = linalg.solve(A, b) yest[i] = beta[0] + beta[1] * x[i] residuals = y - yest s = np.median(np.abs(residuals)) delta = np.clip(residuals / (6.0 * 5), -1, 1) delta = (1 - delta ** 2) ** 2 return yest n = 100 x = np.linspace(0, 2 * math.pi, n) y = np.sin(x) + 0.3 * np.random.randn(n) f =0.25 iterations=3 yest = lowess(x, y, f, iterations) plt.plot(x,y, "r.") plt.plot(x,yest, "b-") ID3 (Decision Tree) import numpy as np from scipy.stats import entropy import pandas as pd from sklearn import metrics, tree from sklearn.preprocessing import LabelEncoder le = LabelEncoder() dataset = pd.read_csv('dataset3.csv') dataset = dataset.apply(le.fit_transform) x = dataset.drop(['Play'], axis=1) y = dataset['Play'] clf = tree.DecisionTreeClassifier(criterion='entropy') clf = clf.fit(x,y) tree.plot_tree(clf)