PLA Percentron Learning Algorithm #台大 Machine learning #

                 Percentron  Learning Algorithm


这里肯定会预先给定一个关于垃圾邮件词汇的集合(keyword set),然后根据四组不通过的输入样本里面垃圾词汇出现的频率来鉴别是否是垃圾邮件.系统输出+1判定为垃圾邮件,否则不是.这里答案是第二组.





  如果实际输出wTt?xn(t) 和预测输出不一致的话,









这个demo 来自link,之前的源码在我的配置环境下不能跑,我有稍作改动.

Programmer  :   EOF
file        :
date        :   2015.02.22

Code description:
    This program is coded for Perceptron Learning Algorithm.

import numpy as np
import matplotlib.pyplot as plt
import random
import os, subprocess

class Perceptron:
    def __init__(self, N):
        # random linearly seperated data
        xA, yA, xB, yB = [random.uniform(-1, 1) for i in range(4)]
        self.V = np.array([xB*yA - xA*yB, yB - yA, xA - xB])
        self.X = self.generate_points(N)

    def generate_points(self, N):
        X = []
        for i in range(N):
            x1, x2 = [random.uniform(-1, 1) for i in range(2)]
            x = np.array([1, x1, x2])
            s = int(np.sign(
            X.append((x, s))

        return X

    def plot(self, mispts = None, vec = None, save = False):
        fig = plt.figure(figsize=(5,5))
        plt.xlim(-1, 1)
        plt.ylim(-1, 1)
        V = self.V
        a, b = -V[1]/V[2], -V[0]/V[2]
        l = np.linspace(-1, 1)
        plt.plot(l, a*l + b, ‘k-‘)
        cols = {1: ‘r‘, -1: ‘b‘}

        for x,s in self.X:
            plt.plot(x[1], x[2], cols[s] + ‘o‘)

        if mispts:
            for x, s in mispts:
                plt.plot(x[1], x[2], cols[s] + ‘.‘)

        if vec != None:
            aa, bb = -vec[1]/vec[2], -vec[0]/vec[2]
            plt.plot(l, aa*l + bb, ‘g-‘, lw = 2)

        if save:
            if not mispts:
                plt.title(‘N = %s‘ % (str(len(self.X))))
                plt.title(‘N = %s with % test points‘                             % (str(len(self.X)), str(len(mispts))))

            plt.savefig(‘p_N %s ‘ % (str(len(self.X))),                         dpi = 200, bbox_inches = ‘tight‘)

    def classification_error(self, vec, pts = None):
        # Error defined as fraction of misclassified points
        if not pts:
            pts = self.X

        M = len(pts)
        n_mispts = 0
        for x, s in pts:
            if int(np.sign( != s :
                n_mispts += 1

        error = n_mispts / float(M)
        return error

    def choose_miscl_point(self, vec):
        # Choose a random point among the misclassified
        pts = self.X
        mispts = []
        for x, s in pts:
            if int(np.sign( !=s :
                mispts.append((x, s))

        return mispts[random.randrange(0, len(mispts))]

    def pla(self, save = False):
        # Initialize the weights to zeros
        w = np.zeros(3)
        X, N = self.X, len(self.X)
        it = 0
        # Iterate until all points are correctly classified
        while self.classification_error(w) != 0:
            it += 1
            # pick random misclassified point
            x, s = self.choose_miscl_point(w)
            # update weights
            w += s*x
            if save:
                self.plot(vec = w)
                plt.title(‘N = %s, Iteration %s\n‘                             % (str(N), str(it)))
                plt.savefig(‘p_N % s_it %s‘ % (str(N), str(it)),                             dpi = 200, bbox_inches = ‘tight‘)

        self.w = w

    def check_error(self, M, vec):
        check_pts = self.generate_points(M)
        return self.classification_error(vec, pts = check_pts)

#--------for testing-------------------------
p = Perceptron(20)
#p.plot(p.generate_points(20),p.w, save=True)
