| 1 | + | { |
| 2 | + | "cells": [ |
| 3 | + | { |
| 4 | + | "cell_type": "code", |
| 5 | + | "execution_count": 6, |
| 6 | + | "metadata": {}, |
| 7 | + | "outputs": [], |
| 8 | + | "source": [ |
| 9 | + | "import pandas as pd\n", |
| 10 | + | "import numpy as np\n", |
| 11 | + | "import matplotlib" |
| 12 | + | ] |
| 13 | + | }, |
| 14 | + | { |
| 15 | + | "cell_type": "code", |
| 16 | + | "execution_count": 8, |
| 17 | + | "metadata": {}, |
| 18 | + | "outputs": [], |
| 19 | + | "source": [ |
| 20 | + | "import matplotlib.pyplot as plt\n", |
| 21 | + | "from sklearn.pipeline import Pipeline\n", |
| 22 | + | "from sklearn.preprocessing import MinMaxScaler\n", |
| 23 | + | "from sklearn.preprocessing import StandardScaler\n", |
| 24 | + | "from sklearn.preprocessing import OrdinalEncoder\n", |
| 25 | + | "from sklearn.impute import SimpleImputer\n", |
| 26 | + | "from sklearn.preprocessing import OneHotEncoder\n", |
| 27 | + | "from sklearn.compose import ColumnTransformer\n" |
| 28 | + | ] |
| 29 | + | }, |
| 30 | + | { |
| 31 | + | "cell_type": "code", |
| 32 | + | "execution_count": 9, |
| 33 | + | "metadata": {}, |
| 34 | + | "outputs": [], |
| 35 | + | "source": [ |
| 36 | + | "attack_types = {\n", |
| 37 | + | " 'normal': 'normal',\n", |
| 38 | + | "\n", |
| 39 | + | " 'back': 'DoS',\n", |
| 40 | + | " 'land': 'DoS',\n", |
| 41 | + | " 'neptune': 'DoS',\n", |
| 42 | + | " 'pod': 'DoS',\n", |
| 43 | + | " 'smurf': 'DoS',\n", |
| 44 | + | " 'teardrop': 'DoS',\n", |
| 45 | + | " 'mailbomb': 'DoS',\n", |
| 46 | + | " 'apache2': 'DoS',\n", |
| 47 | + | " 'processtable': 'DoS',\n", |
| 48 | + | " 'udpstorm': 'DoS',\n", |
| 49 | + | "\n", |
| 50 | + | " 'ipsweep': 'Probe',\n", |
| 51 | + | " 'nmap': 'Probe',\n", |
| 52 | + | " 'portsweep': 'Probe',\n", |
| 53 | + | " 'satan': 'Probe',\n", |
| 54 | + | " 'mscan': 'Probe',\n", |
| 55 | + | " 'saint': 'Probe',\n", |
| 56 | + | "\n", |
| 57 | + | " 'ftp_write': 'R2L',\n", |
| 58 | + | " 'guess_passwd': 'R2L',\n", |
| 59 | + | " 'imap': 'R2L',\n", |
| 60 | + | " 'multihop': 'R2L',\n", |
| 61 | + | " 'phf': 'R2L',\n", |
| 62 | + | " 'spy': 'R2L',\n", |
| 63 | + | " 'warezclient': 'R2L',\n", |
| 64 | + | " 'warezmaster': 'R2L',\n", |
| 65 | + | " 'sendmail': 'R2L',\n", |
| 66 | + | " 'named': 'R2L',\n", |
| 67 | + | " 'snmpgetattack': 'R2L',\n", |
| 68 | + | " 'snmpguess': 'R2L',\n", |
| 69 | + | " 'xlock': 'R2L',\n", |
| 70 | + | " 'xsnoop': 'R2L',\n", |
| 71 | + | " 'worm': 'R2L',\n", |
| 72 | + | "\n", |
| 73 | + | " 'buffer_overflow': 'U2R',\n", |
| 74 | + | " 'loadmodule': 'U2R',\n", |
| 75 | + | " 'perl': 'U2R',\n", |
| 76 | + | " 'rootkit': 'U2R',\n", |
| 77 | + | " 'httptunnel': 'U2R',\n", |
| 78 | + | " 'ps': 'U2R',\n", |
| 79 | + | " 'sqlattack': 'U2R',\n", |
| 80 | + | " 'xterm': 'U2R'\n", |
| 81 | + | "}\n" |
| 82 | + | ] |
| 83 | + | }, |
| 84 | + | { |
| 85 | + | "cell_type": "code", |
| 86 | + | "execution_count": 10, |
| 87 | + | "metadata": {}, |
| 88 | + | "outputs": [], |
| 89 | + | "source": [ |
| 90 | + | "is_attack = {\n", |
| 91 | + | " \"DoS\":\"attack\",\n", |
| 92 | + | " \"R2L\":\"attack\",\n", |
| 93 | + | " \"U2R\":\"attack\",\n", |
| 94 | + | " \"Probe\":\"attack\",\n", |
| 95 | + | " \"normal\":\"normal\"\n", |
| 96 | + | "}" |
| 97 | + | ] |
| 98 | + | }, |
| 99 | + | { |
| 100 | + | "cell_type": "code", |
| 101 | + | "execution_count": 11, |
| 102 | + | "metadata": {}, |
| 103 | + | "outputs": [], |
| 104 | + | "source": [ |
| 105 | + | "kdd_path_pkl = \"NSL_KDD/\"\n", |
| 106 | + | "kdd_path = \"NSL_KDD/\"" |
| 107 | + | ] |
| 108 | + | }, |
| 109 | + | { |
| 110 | + | "cell_type": "code", |
| 111 | + | "execution_count": 12, |
| 112 | + | "metadata": {}, |
| 113 | + | "outputs": [], |
| 114 | + | "source": [ |
| 115 | + | "class read_data:\n", |
| 116 | + | " col_names = [\"duration\",\"protocol_type\",\"service\",\"flag\",\"src_bytes\",\n", |
| 117 | + | " \"dst_bytes\",\"land\",\"wrong_fragment\",\"urgent\",\"hot\",\"num_failed_logins\",\n", |
| 118 | + | " \"logged_in\",\"num_compromised\",\"root_shell\",\"su_attempted\",\"num_root\",\n", |
| 119 | + | " \"num_file_creations\",\"num_shells\",\"num_access_files\",\"num_outbound_cmds\",\n", |
| 120 | + | " \"is_host_login\",\"is_guest_login\",\"count\",\"srv_count\",\"serror_rate\",\n", |
| 121 | + | " \"srv_serror_rate\",\"rerror_rate\",\"srv_rerror_rate\",\"same_srv_rate\",\n", |
| 122 | + | " \"diff_srv_rate\",\"srv_diff_host_rate\",\"dst_host_count\",\"dst_host_srv_count\",\n", |
| 123 | + | " \"dst_host_same_srv_rate\",\"dst_host_diff_srv_rate\",\"dst_host_same_src_port_rate\",\n", |
| 124 | + | " \"dst_host_srv_diff_host_rate\",\"dst_host_serror_rate\",\"dst_host_srv_serror_rate\",\n", |
| 125 | + | " \"dst_host_rerror_rate\",\"dst_host_srv_rerror_rate\",\"label\", \"difficulty_level\"]\n", |
| 126 | + | "\n", |
| 127 | + | " KDDTrain = pd.read_csv(kdd_path+\"KDDTrain+.txt\",names = col_names,)\n", |
| 128 | + | " KDDTest = pd.read_csv(kdd_path+\"KDDTest+.txt\",names = col_names,)\n", |
| 129 | + | "\n", |
| 130 | + | "\n", |
| 131 | + | " KDDAll = pd.concat([KDDTrain, KDDTest])\n", |
| 132 | + | "\n", |
| 133 | + | " kdd_diff_level_all = KDDAll[\"difficulty_level\"].copy()\n", |
| 134 | + | " kdd_diff_level_train = KDDTrain[\"difficulty_level\"].copy()\n", |
| 135 | + | " kdd_diff_level_test = KDDTest[\"difficulty_level\"].copy()\n", |
| 136 | + | "\n", |
| 137 | + | " KDDAll = KDDAll.drop(\"difficulty_level\", axis = 1)\n", |
| 138 | + | "\n", |
| 139 | + | " KDDTrain.to_csv(kdd_path_pkl+\"KDDAll+.csv\")\n", |
| 140 | + | "\n", |
| 141 | + | " KDDTrain_len = KDDTrain.shape[0]\n", |
| 142 | + | " KDDTest_len = KDDTest.shape[0]\n", |
| 143 | + | "\n", |
| 144 | + | " KDDAll[\"type\"] = KDDAll.label.map(lambda x: attack_types[x])\n", |
| 145 | + | " KDDAll[\"isa\"] = KDDAll.type.map(lambda x: is_attack[x])\n", |
| 146 | + | "\n", |
| 147 | + | " KDDTrain[\"type\"] = KDDTrain.label.map(lambda x: attack_types[x])\n", |
| 148 | + | " KDDTrain[\"isa\"] = KDDTrain.type.map(lambda x: is_attack[x])\n", |
| 149 | + | "\n", |
| 150 | + | " KDDTest[\"type\"] = KDDTest.label.map(lambda x: attack_types[x])\n", |
| 151 | + | " KDDTest[\"isa\"] = KDDTest.type.map(lambda x: is_attack[x])\n", |
| 152 | + | "\n", |
| 153 | + | " kdd_attack_type_group = KDDAll.groupby(\"type\")\n", |
| 154 | + | " kdd_is_attack_group = KDDAll.groupby(\"isa\")\n", |
| 155 | + | "\n", |
| 156 | + | " kdd_attack_type_group.type.count()\n", |
| 157 | + | " kdd_is_attack_group[\"isa\"].count()\n", |
| 158 | + | "\n", |
| 159 | + | " KDDAll_is = KDDAll.copy()\n", |
| 160 | + | " KDDAll_type = KDDAll.copy()\n", |
| 161 | + | "\n", |
| 162 | + | " KDDAll_is_y = KDDAll[\"isa\"].copy()\n", |
| 163 | + | " KDDAll_is.drop([\"label\", \"isa\"], axis=1, inplace=True)\n", |
| 164 | + | " KDDAll_type_y = KDDAll[\"type\"].copy()\n", |
| 165 | + | " KDDAll_type.drop([\"label\", \"type\"], axis=1, inplace=True)\n", |
| 166 | + | "\n", |
| 167 | + | " KDDTrain_is_y = KDDTrain[\"isa\"].copy()\n", |
| 168 | + | " KDDTrain_type_y = KDDTrain[\"type\"].copy()\n", |
| 169 | + | "\n", |
| 170 | + | " KDDTest_is_y = KDDTest[\"isa\"].copy()\n", |
| 171 | + | " KDDTest_type_y = KDDTest[\"type\"].copy()\n", |
| 172 | + | "\n", |
| 173 | + | " class_mapping = {'attack': 0, 'normal': 1}\n", |
| 174 | + | " Y_Train = KDDTrain_is_y.map(class_mapping)\n", |
| 175 | + | " Y_Test = KDDTest_is_y.map(class_mapping)\n" |
| 176 | + | ] |
| 177 | + | }, |
| 178 | + | { |
| 179 | + | "cell_type": "code", |
| 180 | + | "execution_count": 14, |
| 181 | + | "metadata": {}, |
| 182 | + | "outputs": [], |
| 183 | + | "source": [ |
| 184 | + | "class preprocess_data:\n", |
| 185 | + | "\n", |
| 186 | + | " col_names_onehot = [\"protocol_type\",\"service\",\"flag\", \"type\"]\n", |
| 187 | + | " col_names_onehot_s = [\"protocol_type\",\"service\",\"flag\",\"type\"]\n", |
| 188 | + | " KDDAll_num = read_data.KDDAll_is.drop(col_names_onehot, axis=1) #pd\n", |
| 189 | + | " KDDAll_onehot_s = read_data.KDDAll_is[ col_names_onehot_s] #pd\n", |
| 190 | + | "\n", |
| 191 | + | " num_pipeline = Pipeline([('scaling', StandardScaler())])\n", |
| 192 | + | " cat_string_pipeline = Pipeline([('imputer', SimpleImputer(strategy = \"constant\", fill_value = \"missing\")), ('ordi', OrdinalEncoder()), ('onehots', OneHotEncoder(categories='auto'))])\n", |
| 193 | + | "\n", |
| 194 | + | " num_attribs = list(KDDAll_num)\n", |
| 195 | + | " cat_s_attribs = list(KDDAll_onehot_s)\n", |
| 196 | + | "\n", |
| 197 | + | " full_pipeline = ColumnTransformer([(\"num\", num_pipeline, num_attribs), (\"cats\", cat_string_pipeline, cat_s_attribs)])\n", |
| 198 | + | "\n", |
| 199 | + | " KDDAll_t = full_pipeline.fit_transform(read_data.KDDAll_is)\n", |
| 200 | + | "\n", |
| 201 | + | " X_Train = KDDAll_t[:read_data.KDDTrain_len]\n", |
| 202 | + | " X_Test = KDDAll_t[read_data.KDDTrain_len:read_data.KDDTrain_len + read_data.KDDTest_len]\n" |
| 203 | + | ] |
| 204 | + | }, |
| 205 | + | { |
| 206 | + | "cell_type": "code", |
| 207 | + | "execution_count": 15, |
| 208 | + | "metadata": {}, |
| 209 | + | "outputs": [], |
| 210 | + | "source": [ |
| 211 | + | "import tensorflow as tf\n", |
| 212 | + | "from tensorflow.keras.layers import Dense\n", |
| 213 | + | "from tensorflow.keras import optimizers\n", |
| 214 | + | "from tensorflow.keras import models\n", |
| 215 | + | "from tensorflow.keras import layers\n", |
| 216 | + | "from tensorflow.keras import wrappers\n", |
| 217 | + | "from tensorflow.keras import initializers\n", |
| 218 | + | "from tensorflow.keras import regularizers\n", |
| 219 | + | "from tensorflow.keras import losses\n", |
| 220 | + | "from scipy.stats import reciprocal\n", |
| 221 | + | "from sklearn.model_selection import RandomizedSearchCV\n", |
| 222 | + | "from tensorflow.keras.wrappers.scikit_learn import KerasClassifier\n", |
| 223 | + | "from sklearn.metrics import classification_report\n", |
| 224 | + | "import time" |
| 225 | + | ] |
| 226 | + | }, |
| 227 | + | { |
| 228 | + | "cell_type": "code", |
| 229 | + | "execution_count": 16, |
| 230 | + | "metadata": {}, |
| 231 | + | "outputs": [], |
| 232 | + | "source": [ |
| 233 | + | "features_dim = preprocess_data.X_Train.shape[1]" |
| 234 | + | ] |
| 235 | + | }, |
| 236 | + | { |
| 237 | + | "cell_type": "code", |
| 238 | + | "execution_count": 19, |
| 239 | + | "metadata": {}, |
| 240 | + | "outputs": [], |
| 241 | + | "source": [ |
| 242 | + | "def build_model(learning_rate,mt,regrt, lay1, lay2, lay3, initiali):\n", |
| 243 | + | " model = models.Sequential([\n", |
| 244 | + | " layers.Dense(units=lay1, input_shape=(features_dim,), activation=\"relu\", kernel_initializer=initiali, bias_initializer='zeros'),\n", |
| 245 | + | " layers.Dropout(0.2),\n", |
| 246 | + | " layers.Dense(units=lay2, activation=\"relu\"),\n", |
| 247 | + | " layers.Dropout(0.2),\n", |
| 248 | + | " layers.Dense(units=lay3, activation=\"relu\"),\n", |
| 249 | + | " layers.Dropout(0.2),\n", |
| 250 | + | " layers.Dense(1, activation=\"sigmoid\")])\n", |
| 251 | + | " optRMS = optimizers.RMSprop(lr=learning_rate)\n", |
| 252 | + | " model.compile(loss='binary_crossentropy', optimizer='RMSprop')\n", |
| 253 | + | " return model\n" |
| 254 | + | ] |
| 255 | + | }, |
| 256 | + | { |
| 257 | + | "cell_type": "code", |
| 258 | + | "execution_count": 20, |
| 259 | + | "metadata": {}, |
| 260 | + | "outputs": [], |
| 261 | + | "source": [ |
| 262 | + | "keras_reg = wrappers.scikit_learn.KerasClassifier(build_model)\n", |
| 263 | + | "\n", |
| 264 | + | "param_distribs = {\"learning_rate\": reciprocal(0.0001, 0.0005), \"mt\": reciprocal(0.9, 0.94), \"regrt\":[0.001, 0.01, 0.1],\n", |
| 265 | + | "\t\t\t\t\t\"lay1\":[256,128], \"lay2\":[64,32], \"lay3\":[32,16],\n", |
| 266 | + | "\t\t\t\t\t\"initiali\":['glorot_uniform', 'he_uniform']\t}\n", |
| 267 | + | "# keras.layers.BatchNormalization(momentum=0.9),\n", |
| 268 | + | "\n", |
| 269 | + | "rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, cv=5, scoring='f1_macro', n_jobs=-1, error_score=1)\n" |
| 270 | + | ] |
| 271 | + | }, |
| 272 | + | { |
| 273 | + | "cell_type": "code", |
| 274 | + | "execution_count": 21, |
| 275 | + | "metadata": {}, |
| 276 | + | "outputs": [], |
| 277 | + | "source": [ |
| 278 | + | "X_Train = preprocess_data.X_Train\n", |
| 279 | + | "Y_Train = read_data.Y_Train\n", |
| 280 | + | "\n", |
| 281 | + | "X_Test = preprocess_data.X_Test\n", |
| 282 | + | "Y_Test = read_data.Y_Test" |
| 283 | + | ] |
| 284 | + | }, |
| 285 | + | { |
| 286 | + | "cell_type": "code", |
| 287 | + | "execution_count": 22, |
| 288 | + | "metadata": {}, |
| 289 | + | "outputs": [], |
| 290 | + | "source": [ |
| 291 | + | "batch_s = 2000\n", |
| 292 | + | "epoches = 75\n", |
| 293 | + | "ver = 2" |
| 294 | + | ] |
| 295 | + | }, |
| 296 | + | { |
| 297 | + | "cell_type": "code", |
| 298 | + | "execution_count": null, |
| 299 | + | "metadata": {}, |
| 300 | + | "outputs": [], |
| 301 | + | "source": [ |
| 302 | + | "rnd_search_cv.fit(X_Train, Y_Train, batch_size=batch_s, epochs=epoches, verbose=ver)" |
| 303 | + | ] |
| 304 | + | }, |
| 305 | + | { |
| 306 | + | "cell_type": "markdown", |
| 307 | + | "metadata": {}, |
| 308 | + | "source": [ |
| 309 | + | "# ASD on MEC: Model is already pre-trained" |
| 310 | + | ] |
| 311 | + | }, |
| 312 | + | { |
| 313 | + | "cell_type": "markdown", |
| 314 | + | "metadata": {}, |
| 315 | + | "source": [ |
| 316 | + | "Model : fully connected Neural Network with dropout 0.2, RMSProp optimizer and binary-crossentropy loss function -\n", |
| 317 | + | "Epochs: 75 - \n", |
| 318 | + | "Batch size : 2000 connections per step " |
| 319 | + | ] |
| 320 | + | }, |
| 321 | + | { |
| 322 | + | "cell_type": "markdown", |
| 323 | + | "metadata": {}, |
| 324 | + | "source": [ |
| 325 | + | "## Start Prediction" |
| 326 | + | ] |
| 327 | + | }, |
| 328 | + | { |
| 329 | + | "cell_type": "code", |
| 330 | + | "execution_count": null, |
| 331 | + | "metadata": {}, |
| 332 | + | "outputs": [], |
| 333 | + | "source": [ |
| 334 | + | "pred_test = rnd_search_cv.predict(X_Test)" |
| 335 | + | ] |
| 336 | + | }, |
| 337 | + | { |
| 338 | + | "cell_type": "markdown", |
| 339 | + | "metadata": {}, |
| 340 | + | "source": [ |
| 341 | + | "Classification report ready" |
| 342 | + | ] |
| 343 | + | }, |
| 344 | + | { |
| 345 | + | "cell_type": "code", |
| 346 | + | "execution_count": null, |
| 347 | + | "metadata": {}, |
| 348 | + | "outputs": [], |
| 349 | + | "source": [ |
| 350 | + | "print(\"Classclassification_report: \\n\", classification_report(Y_Test, pred_test))" |
| 351 | + | ] |
| 352 | + | }, |
| 353 | + | { |
| 354 | + | "cell_type": "code", |
| 355 | + | "execution_count": null, |
| 356 | + | "metadata": {}, |
| 357 | + | "outputs": [], |
| 358 | + | "source": [ |
| 359 | + | "print(\"Best estimator: \\n\", rnd_search_cv.best_estimator_)\n", |
| 360 | + | "print(\"Best score: \\n\", rnd_search_cv.best_score_)\n", |
| 361 | + | "print(\"Best params: \\n\", rnd_search_cv.best_params_)\n", |
| 362 | + | "print(\"Refit time: \\n\", rnd_search_cv.refit_time_)" |
| 363 | + | ] |
| 364 | + | }, |
| 365 | + | { |
| 366 | + | "cell_type": "markdown", |
| 367 | + | "metadata": {}, |
| 368 | + | "source": [ |
| 369 | + | "Plot Confusion Matrix Graph" |
| 370 | + | ] |
| 371 | + | }, |
| 372 | + | { |
| 373 | + | "cell_type": "code", |
| 374 | + | "execution_count": null, |
| 375 | + | "metadata": {}, |
| 376 | + | "outputs": [], |
| 377 | + | "source": [ |
| 378 | + | "from sklearn.metrics import confusion_matrix\n", |
| 379 | + | "cm = confusion_matrix(Y_Test, pred_test)\n", |
| 380 | + | "import itertools\n", |
| 381 | + | "classes = ['attack','normal']\n", |
| 382 | + | "plt.figure()\n", |
| 383 | + | "plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)\n", |
| 384 | + | "plt.title('Confusion matrix')\n", |
| 385 | + | "plt.colorbar()\n", |
| 386 | + | "tick_marks = np.arange(len(classes))\n", |
| 387 | + | "plt.xticks(tick_marks, classes, rotation=45)\n", |
| 388 | + | "plt.yticks(tick_marks, classes)\n", |
| 389 | + | "print(cm)\n", |
| 390 | + | "thresh = cm.max() / 2.\n", |
| 391 | + | "for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", |
| 392 | + | " plt.text(j, i, cm[i, j].round(4),\n", |
| 393 | + | " horizontalalignment=\"center\",\n", |
| 394 | + | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", |
| 395 | + | "\n", |
| 396 | + | "plt.tight_layout()\n", |
| 397 | + | "plt.ylabel('True label')\n", |
| 398 | + | "plt.xlabel('Predicted label')" |
| 399 | + | ] |
| 400 | + | } |
| 401 | + | ], |
| 402 | + | "metadata": { |
| 403 | + | "kernelspec": { |
| 404 | + | "display_name": "Python 3", |
| 405 | + | "language": "python", |
| 406 | + | "name": "python3" |
| 407 | + | }, |
| 408 | + | "language_info": { |
| 409 | + | "codemirror_mode": { |
| 410 | + | "name": "ipython", |
| 411 | + | "version": 3 |
| 412 | + | }, |
| 413 | + | "file_extension": ".py", |
| 414 | + | "mimetype": "text/x-python", |
| 415 | + | "name": "python", |
| 416 | + | "nbconvert_exporter": "python", |
| 417 | + | "pygments_lexer": "ipython3", |
| 418 | + | "version": "3.7.3" |
| 419 | + | } |
| 420 | + | }, |
| 421 | + | "nbformat": 4, |
| 422 | + | "nbformat_minor": 2 |
| 423 | + | } |
| 424 | + | |