In [1]:
import pandas as pd

uris = [
    'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data',
    'https://archive.ics.uci.edu/ml/machine-learning-databases/ionosphere/ionosphere.data',
    'https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data',
    'https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data'
]

bc, io, lr, sp = [pd.read_csv(uri, header=None) for uri in uris]

In [2]:
X_bc, y_bc = bc.iloc[:,2:], bc.iloc[:,1]
X_io, y_io = io.iloc[:,:io.shape[1]-1], io.iloc[:,-1]
X_lr, y_lr = lr.iloc[:,1:], lr.iloc[:,0]
X_sp, y_sp = sp.iloc[:,:sp.shape[1]-1], sp.iloc[:,-1]

# Any preprocessing (normalization, missing values...) should be done here. 

datasets = { label : (X, y) for label, X, y in [('breast_cancer', X_bc, y_bc), ('ionosphere', X_io, y_io), ('letter_recognition', X_lr, y_lr), ('spambase', X_sp, y_sp)]}

In [3]:
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC

algorithms = {
    'perceptron': Perceptron,
    'logistic_regression': LogisticRegression,
    'adaboost': AdaBoostClassifier,
    'svm': SVC
}

In [4]:
from sklearn.cross_validation import train_test_split

scores_df = pd.DataFrame(columns=algorithms.keys())
n = 20

for algorithm_label, algorithm_class in algorithms.items():
    model = algorithm_class()
    algorithm_scores = {}
    for dataset_label, (X, y) in datasets.items():
        dataset_score = 0
        for i in range(n):
            X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7)
            model.fit(X_train, y_train)
            dataset_score += model.score(X_test, y_test)
        algorithm_scores[dataset_label] = dataset_score / n
    scores_df[algorithm_label] = pd.Series(algorithm_scores)

In [5]:
scores_df

                    perceptron       svm  adaboost  logistic_regression
breast_cancer         0.802924  0.639766  0.959064             0.949123
ionosphere            0.811321  0.919340  0.925472             0.878302
letter_recognition    0.467583  0.971308  0.262767             0.717533
spambase              0.641528  0.831680  0.938559             0.925199