代码拉取完成,页面将自动刷新
同步操作将从 openKylin/cybersectookits 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
import tensorflow.compat.v1 as tf
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from sklearn import metrics
from tensorflow import keras
from sklearn.metrics import confusion_matrix, roc_curve
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import time
import pandas as pd
# 标签
list_y = ['WWW','MAIL','FTP-CONTROL','FTP-PASV','ATTACK','P2P',
'DATABASE','FTP-DATA','MULTIMEDIA','SERVICES','INTERACTIVE','GAMES']
# 数据预处理
def data_preprocess(filename):
X, Y = [], []
dir = os.getcwd()
for f in filename:
print(f)
with open(os.path.join(dir, f), 'r') as file:
for n, i in enumerate(file.readlines()[253:]):
# 将 Y 和 N 分别转为 1 和 0
i = i.replace('Y','1')
i = i.replace('N', '0')
spl = i.split(',')
if spl.count('?')>8:
continue
# 去除字符'\n'
i = i.replace('\n', '')
fz = [float(f) for f in i.split(',')[:-1] if f != '?']
meana = sum(fz) / len(fz)
i = i.replace('?', str(0))
# 均值填充,加高斯白噪声
# 方便作为深度学习模型的输入
x = [float(j) for j in i.split(',')[:-1]] +[meana] * 8 + np.random.normal(0,1,256)
# 修正标签字符
y = i.split(',')[-1].replace('FTP-CO0TROL','FTP-CONTROL')
y = y.replace('I0TERACTIVE','INTERACTIVE' )
y = list_y.index(y)
X.append(x)
Y.append(y)
file.close()
return X, Y
# 数据标准化
# 数据预处理,返回处理好的数据和标签
total_x,total_y = data_preprocess(['entry01.weka.allclass.arff','entry02.weka.allclass.arff',
'entry03.weka.allclass.arff','entry04.weka.allclass.arff',
'entry05.weka.allclass.arff','entry09.weka.allclass.arff',
'entry10.weka.allclass.arff','entry07.weka.allclass.arff',
'entry08.weka.allclass.arff','entry06.weka.allclass.arff'])
# 使用 train_test_split 对训练集和测试集按照 1:3 进行划分
train_x,test_x,train_y,test_y = train_test_split(total_x,total_y,test_size=0.25, random_state=0)
# 使用 convert_to_tensor 将数据转为tensor类型
train_x = tf.convert_to_tensor(train_x, dtype=tf.float64)
train_y= tf.convert_to_tensor(train_y,dtype=tf.int64)
test_x = tf.convert_to_tensor(test_x, dtype=tf.float64)
test_y = tf.convert_to_tensor(test_y,dtype= tf.int64)
# 使用 tf.keras.utils.normalize 将训练集和测试集样本规范化处理
train_x = tf.keras.utils.normalize(train_x, axis=1)
test_x = tf.keras.utils.normalize(test_x, axis=1)
# 绘制混淆矩阵
def plot_confusion_matrix(title, pred_y):
cm = confusion_matrix(test_y, np.argmax(pred_y, 1))
labels_name = list_y
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # 归一化
plt.imshow(cm, interpolation='nearest') # 在特定的窗口上显示图像
plt.title(title) # 图像标题
plt.colorbar()
num_local = np.array(range(len(labels_name)))
plt.xticks(num_local, labels_name, rotation=90) # 将标签印在x轴坐标上
plt.yticks(num_local, labels_name) # 将标签印在y轴坐标上
plt.ylabel('True')
plt.xlabel('Predicted')
plt.show()
num_classes = 12 # 最终结果分成12类
num_pixels = 256 # 维度为256
def DenseBlock():
t1 = time.time()
# 构建网络
model = Sequential()
model.add(layers.Dense(num_pixels, input_dim=num_pixels, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))
# 展示当前的网络结构
model.summary()
# 编译模型
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# 重塑输入样本
X_train = tf.reshape(train_x, [-1, 256])
X_test = tf.reshape(test_x, [-1, 256])
# 训练模型
history = model.fit(X_train, train_y, validation_split=0.2, epochs=20, batch_size=128, verbose=2,)
# 评估模型
scores = model.evaluate(X_test, test_y, verbose=0) # scores = {'loss', 'accuracy'}
# 输出模型的预测结果
predict_y = model.predict(X_test)
t2 = time.time()
# 输出精度和运行时间
print("Accuracy: %.2f%%" % (scores[1] * 100), t2 - t1)
# 展示混淆矩阵
plot_confusion_matrix("DenseBlock Confusion Matrix", predict_y)
# 最终返回训练好的模型
return model
clf = DenseBlock()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。