master

分支 (1)

管理

管理

master

machine_learning
/
logistic.py

import numpy as np
from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties
import matplotlib as mpl
from matplotlib import animation

font = FontProperties(fname=r"c:\windows\fonts\msyh.ttc", size=10)


def sigmoid(z):
    for (x, y), val in np.ndenumerate(z):
        if val >= 0:
            z[x, y] = 1 / (1 + np.exp(-val))
        else:
            z[x, y] = np.exp(val) / (1 + np.exp(val))
    return z


# 预测函数
def h(x, theta):
    return sigmoid(np.dot(theta, x))


def gradient(x, y, theta):
    return np.dot(y - h(x, theta), x.T)


def loss(x, y, theta):
    return -np.sum(y * np.log(h(x, theta) + 1e-5) + (1 - y) * np.log(1 - h(x, theta) + 1e-5))

def reduce(alpha):
    if alpha < 1e-6:
        return alpha
    return alpha / 4


def SGD(x, y, x1, y1, theta, alpha=0.5, maxNum=10000):
    Loss = []
    Acc = []
    Acc1 = []
    xx = x.T
    for i in range(maxNum):
        if i % 200 == 0:
            Loss.append(loss(x, y, np.array(theta)))
        if i % 1000 == 0:
            Acc.append(accuracy(x, y, theta.flatten()))
            Acc1.append(accuracy(x1, y1, theta.flatten()))
        cur = np.random.randint(0, y.size)
        g = gradient(np.array([xx[cur]]).T, y[cur], theta)
        theta = theta + alpha * g
        if i % 1000 == 0:
            alpha = reduce(alpha)
    return theta.flatten(), Loss, Acc, Acc1

def accuracy(x, y, theta):
    p = predict(np.array([x[1]]), np.array([x[2]]), theta)
    num = 0
    for (i, j), val in np.ndenumerate(p):
        if val == y[j]:
            num += 1
    return num / len(y)

def getData():
    x = np.loadtxt(r'..\data\Exam\train\x.txt').T
    y = np.loadtxt(r'..\data\Exam\train\y.txt')
    x1 = np.loadtxt(r'..\data\Exam\test\x.txt').T
    y1 = np.loadtxt(r'..\data\Exam\test\y.txt')
    x = np.insert(x, 0, np.ones_like(x[0]), axis=0)
    x1 = np.insert(x1, 0, np.ones_like(x1[0]), axis=0)
    return x, y, x1, y1


def predict(x1, x2, theta):
    p = theta[0] * np.ones_like(x1) + x1 * theta[1] + x2 * theta[2]
    for (x, y), i in np.ndenumerate(p):
        if p[x, y] > 0:
            p[x, y] = 1
        else:
            p[x, y] = 0
    return p


def fill(x,theta):
    cm_light = mpl.colors.ListedColormap(['#e4f6f5', '#ffcc00'])
    N, M = 1000, 1000
    x1_min, x2_min = np.min(x[1]) - 1, np.min(x[2]) - 1
    x1_max, x2_max = np.max(x[1]) + 1, np.max(x[2]) + 1
    t1 = np.linspace(x1_min, x1_max, N)
    t2 = np.linspace(x2_min, x2_max, M)
    x1, x2 = np.meshgrid(t1, t2)  # 生成网格采样点
    f = plt.pcolormesh(x1, x2, predict(x1, x2, theta), shading='auto', cmap=cm_light)
    return f


if __name__ == '__main__':
    x, y, x1, y1 = getData()
    theta = np.random.randn(1, x.shape[0])
    print('初始theta=', theta)
    theta, Loss, Acc, Acc1 = SGD(x, y, x1, y1, np.array(theta), alpha=0.5, maxNum=100000)
    print('最终theta=', theta)
    fig = plt.figure(figsize=(10, 4))
    plt.suptitle('Logistic')
    plt.subplot(131)
    plt.xlabel('iteration')
    plt.ylabel('loss')
    plt.title('Loss Change')
    plt.scatter(np.arange(len(Loss)), Loss,s=5)
    plt.subplot(132)
    plt.xlabel('iteration')
    plt.ylabel('accuracy')
    plt.title('Accuracy Change')
    plt.scatter(np.arange(len(Acc)), Acc, label='train',s=5)
    plt.scatter(np.arange(len(Acc1)), Acc1, label='test',s=5)
    plt.legend()
    plt.subplot(133)
    plt.xlabel('feature1')
    plt.ylabel('feature2')
    plt.title('Classification Line')
    # 画分界
    f = fill(x, theta)
    # 画散点
    pos = np.array(np.where(y == 1))
    neg = np.array(np.where(y == 0))
    p1 = plt.scatter(x[1][pos], x[2][pos], marker='+', label='Admitted')
    p2 = plt.scatter(x[1][neg], x[2][neg], marker='^', label='Not admitted')
    # f = -(theta[0] * x[0] + theta[1] * x[1]) / theta[2]
    plt.legend(loc='best')
    plt.show()