🤬
  • Adding jupyter notebook for ASD microservice

  • Loading...
  • karalis committed 4 years ago
    35082997
    1 parent ca2df20c
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    ASD_dnn.ipynb
     1 +{
     2 + "cells": [
     3 + {
     4 + "cell_type": "code",
     5 + "execution_count": 6,
     6 + "metadata": {},
     7 + "outputs": [],
     8 + "source": [
     9 + "import pandas as pd\n",
     10 + "import numpy as np\n",
     11 + "import matplotlib"
     12 + ]
     13 + },
     14 + {
     15 + "cell_type": "code",
     16 + "execution_count": 8,
     17 + "metadata": {},
     18 + "outputs": [],
     19 + "source": [
     20 + "import matplotlib.pyplot as plt\n",
     21 + "from sklearn.pipeline import Pipeline\n",
     22 + "from sklearn.preprocessing import MinMaxScaler\n",
     23 + "from sklearn.preprocessing import StandardScaler\n",
     24 + "from sklearn.preprocessing import OrdinalEncoder\n",
     25 + "from sklearn.impute import SimpleImputer\n",
     26 + "from sklearn.preprocessing import OneHotEncoder\n",
     27 + "from sklearn.compose import ColumnTransformer\n"
     28 + ]
     29 + },
     30 + {
     31 + "cell_type": "code",
     32 + "execution_count": 9,
     33 + "metadata": {},
     34 + "outputs": [],
     35 + "source": [
     36 + "attack_types = {\n",
     37 + " 'normal': 'normal',\n",
     38 + "\n",
     39 + " 'back': 'DoS',\n",
     40 + " 'land': 'DoS',\n",
     41 + " 'neptune': 'DoS',\n",
     42 + " 'pod': 'DoS',\n",
     43 + " 'smurf': 'DoS',\n",
     44 + " 'teardrop': 'DoS',\n",
     45 + " 'mailbomb': 'DoS',\n",
     46 + " 'apache2': 'DoS',\n",
     47 + " 'processtable': 'DoS',\n",
     48 + " 'udpstorm': 'DoS',\n",
     49 + "\n",
     50 + " 'ipsweep': 'Probe',\n",
     51 + " 'nmap': 'Probe',\n",
     52 + " 'portsweep': 'Probe',\n",
     53 + " 'satan': 'Probe',\n",
     54 + " 'mscan': 'Probe',\n",
     55 + " 'saint': 'Probe',\n",
     56 + "\n",
     57 + " 'ftp_write': 'R2L',\n",
     58 + " 'guess_passwd': 'R2L',\n",
     59 + " 'imap': 'R2L',\n",
     60 + " 'multihop': 'R2L',\n",
     61 + " 'phf': 'R2L',\n",
     62 + " 'spy': 'R2L',\n",
     63 + " 'warezclient': 'R2L',\n",
     64 + " 'warezmaster': 'R2L',\n",
     65 + " 'sendmail': 'R2L',\n",
     66 + " 'named': 'R2L',\n",
     67 + " 'snmpgetattack': 'R2L',\n",
     68 + " 'snmpguess': 'R2L',\n",
     69 + " 'xlock': 'R2L',\n",
     70 + " 'xsnoop': 'R2L',\n",
     71 + " 'worm': 'R2L',\n",
     72 + "\n",
     73 + " 'buffer_overflow': 'U2R',\n",
     74 + " 'loadmodule': 'U2R',\n",
     75 + " 'perl': 'U2R',\n",
     76 + " 'rootkit': 'U2R',\n",
     77 + " 'httptunnel': 'U2R',\n",
     78 + " 'ps': 'U2R',\n",
     79 + " 'sqlattack': 'U2R',\n",
     80 + " 'xterm': 'U2R'\n",
     81 + "}\n"
     82 + ]
     83 + },
     84 + {
     85 + "cell_type": "code",
     86 + "execution_count": 10,
     87 + "metadata": {},
     88 + "outputs": [],
     89 + "source": [
     90 + "is_attack = {\n",
     91 + " \"DoS\":\"attack\",\n",
     92 + " \"R2L\":\"attack\",\n",
     93 + " \"U2R\":\"attack\",\n",
     94 + " \"Probe\":\"attack\",\n",
     95 + " \"normal\":\"normal\"\n",
     96 + "}"
     97 + ]
     98 + },
     99 + {
     100 + "cell_type": "code",
     101 + "execution_count": 11,
     102 + "metadata": {},
     103 + "outputs": [],
     104 + "source": [
     105 + "kdd_path_pkl = \"NSL_KDD/\"\n",
     106 + "kdd_path = \"NSL_KDD/\""
     107 + ]
     108 + },
     109 + {
     110 + "cell_type": "code",
     111 + "execution_count": 12,
     112 + "metadata": {},
     113 + "outputs": [],
     114 + "source": [
     115 + "class read_data:\n",
     116 + " col_names = [\"duration\",\"protocol_type\",\"service\",\"flag\",\"src_bytes\",\n",
     117 + " \"dst_bytes\",\"land\",\"wrong_fragment\",\"urgent\",\"hot\",\"num_failed_logins\",\n",
     118 + " \"logged_in\",\"num_compromised\",\"root_shell\",\"su_attempted\",\"num_root\",\n",
     119 + " \"num_file_creations\",\"num_shells\",\"num_access_files\",\"num_outbound_cmds\",\n",
     120 + " \"is_host_login\",\"is_guest_login\",\"count\",\"srv_count\",\"serror_rate\",\n",
     121 + " \"srv_serror_rate\",\"rerror_rate\",\"srv_rerror_rate\",\"same_srv_rate\",\n",
     122 + " \"diff_srv_rate\",\"srv_diff_host_rate\",\"dst_host_count\",\"dst_host_srv_count\",\n",
     123 + " \"dst_host_same_srv_rate\",\"dst_host_diff_srv_rate\",\"dst_host_same_src_port_rate\",\n",
     124 + " \"dst_host_srv_diff_host_rate\",\"dst_host_serror_rate\",\"dst_host_srv_serror_rate\",\n",
     125 + " \"dst_host_rerror_rate\",\"dst_host_srv_rerror_rate\",\"label\", \"difficulty_level\"]\n",
     126 + "\n",
     127 + " KDDTrain = pd.read_csv(kdd_path+\"KDDTrain+.txt\",names = col_names,)\n",
     128 + " KDDTest = pd.read_csv(kdd_path+\"KDDTest+.txt\",names = col_names,)\n",
     129 + "\n",
     130 + "\n",
     131 + " KDDAll = pd.concat([KDDTrain, KDDTest])\n",
     132 + "\n",
     133 + " kdd_diff_level_all = KDDAll[\"difficulty_level\"].copy()\n",
     134 + " kdd_diff_level_train = KDDTrain[\"difficulty_level\"].copy()\n",
     135 + " kdd_diff_level_test = KDDTest[\"difficulty_level\"].copy()\n",
     136 + "\n",
     137 + " KDDAll = KDDAll.drop(\"difficulty_level\", axis = 1)\n",
     138 + "\n",
     139 + " KDDTrain.to_csv(kdd_path_pkl+\"KDDAll+.csv\")\n",
     140 + "\n",
     141 + " KDDTrain_len = KDDTrain.shape[0]\n",
     142 + " KDDTest_len = KDDTest.shape[0]\n",
     143 + "\n",
     144 + " KDDAll[\"type\"] = KDDAll.label.map(lambda x: attack_types[x])\n",
     145 + " KDDAll[\"isa\"] = KDDAll.type.map(lambda x: is_attack[x])\n",
     146 + "\n",
     147 + " KDDTrain[\"type\"] = KDDTrain.label.map(lambda x: attack_types[x])\n",
     148 + " KDDTrain[\"isa\"] = KDDTrain.type.map(lambda x: is_attack[x])\n",
     149 + "\n",
     150 + " KDDTest[\"type\"] = KDDTest.label.map(lambda x: attack_types[x])\n",
     151 + " KDDTest[\"isa\"] = KDDTest.type.map(lambda x: is_attack[x])\n",
     152 + "\n",
     153 + " kdd_attack_type_group = KDDAll.groupby(\"type\")\n",
     154 + " kdd_is_attack_group = KDDAll.groupby(\"isa\")\n",
     155 + "\n",
     156 + " kdd_attack_type_group.type.count()\n",
     157 + " kdd_is_attack_group[\"isa\"].count()\n",
     158 + "\n",
     159 + " KDDAll_is = KDDAll.copy()\n",
     160 + " KDDAll_type = KDDAll.copy()\n",
     161 + "\n",
     162 + " KDDAll_is_y = KDDAll[\"isa\"].copy()\n",
     163 + " KDDAll_is.drop([\"label\", \"isa\"], axis=1, inplace=True)\n",
     164 + " KDDAll_type_y = KDDAll[\"type\"].copy()\n",
     165 + " KDDAll_type.drop([\"label\", \"type\"], axis=1, inplace=True)\n",
     166 + "\n",
     167 + " KDDTrain_is_y = KDDTrain[\"isa\"].copy()\n",
     168 + " KDDTrain_type_y = KDDTrain[\"type\"].copy()\n",
     169 + "\n",
     170 + " KDDTest_is_y = KDDTest[\"isa\"].copy()\n",
     171 + " KDDTest_type_y = KDDTest[\"type\"].copy()\n",
     172 + "\n",
     173 + " class_mapping = {'attack': 0, 'normal': 1}\n",
     174 + " Y_Train = KDDTrain_is_y.map(class_mapping)\n",
     175 + " Y_Test = KDDTest_is_y.map(class_mapping)\n"
     176 + ]
     177 + },
     178 + {
     179 + "cell_type": "code",
     180 + "execution_count": 14,
     181 + "metadata": {},
     182 + "outputs": [],
     183 + "source": [
     184 + "class preprocess_data:\n",
     185 + "\n",
     186 + " col_names_onehot = [\"protocol_type\",\"service\",\"flag\", \"type\"]\n",
     187 + " col_names_onehot_s = [\"protocol_type\",\"service\",\"flag\",\"type\"]\n",
     188 + " KDDAll_num = read_data.KDDAll_is.drop(col_names_onehot, axis=1) #pd\n",
     189 + " KDDAll_onehot_s = read_data.KDDAll_is[ col_names_onehot_s] #pd\n",
     190 + "\n",
     191 + " num_pipeline = Pipeline([('scaling', StandardScaler())])\n",
     192 + " cat_string_pipeline = Pipeline([('imputer', SimpleImputer(strategy = \"constant\", fill_value = \"missing\")), ('ordi', OrdinalEncoder()), ('onehots', OneHotEncoder(categories='auto'))])\n",
     193 + "\n",
     194 + " num_attribs = list(KDDAll_num)\n",
     195 + " cat_s_attribs = list(KDDAll_onehot_s)\n",
     196 + "\n",
     197 + " full_pipeline = ColumnTransformer([(\"num\", num_pipeline, num_attribs), (\"cats\", cat_string_pipeline, cat_s_attribs)])\n",
     198 + "\n",
     199 + " KDDAll_t = full_pipeline.fit_transform(read_data.KDDAll_is)\n",
     200 + "\n",
     201 + " X_Train = KDDAll_t[:read_data.KDDTrain_len]\n",
     202 + " X_Test = KDDAll_t[read_data.KDDTrain_len:read_data.KDDTrain_len + read_data.KDDTest_len]\n"
     203 + ]
     204 + },
     205 + {
     206 + "cell_type": "code",
     207 + "execution_count": 15,
     208 + "metadata": {},
     209 + "outputs": [],
     210 + "source": [
     211 + "import tensorflow as tf\n",
     212 + "from tensorflow.keras.layers import Dense\n",
     213 + "from tensorflow.keras import optimizers\n",
     214 + "from tensorflow.keras import models\n",
     215 + "from tensorflow.keras import layers\n",
     216 + "from tensorflow.keras import wrappers\n",
     217 + "from tensorflow.keras import initializers\n",
     218 + "from tensorflow.keras import regularizers\n",
     219 + "from tensorflow.keras import losses\n",
     220 + "from scipy.stats import reciprocal\n",
     221 + "from sklearn.model_selection import RandomizedSearchCV\n",
     222 + "from tensorflow.keras.wrappers.scikit_learn import KerasClassifier\n",
     223 + "from sklearn.metrics import classification_report\n",
     224 + "import time"
     225 + ]
     226 + },
     227 + {
     228 + "cell_type": "code",
     229 + "execution_count": 16,
     230 + "metadata": {},
     231 + "outputs": [],
     232 + "source": [
     233 + "features_dim = preprocess_data.X_Train.shape[1]"
     234 + ]
     235 + },
     236 + {
     237 + "cell_type": "code",
     238 + "execution_count": 19,
     239 + "metadata": {},
     240 + "outputs": [],
     241 + "source": [
     242 + "def build_model(learning_rate,mt,regrt, lay1, lay2, lay3, initiali):\n",
     243 + " model = models.Sequential([\n",
     244 + " layers.Dense(units=lay1, input_shape=(features_dim,), activation=\"relu\", kernel_initializer=initiali, bias_initializer='zeros'),\n",
     245 + " layers.Dropout(0.2),\n",
     246 + " layers.Dense(units=lay2, activation=\"relu\"),\n",
     247 + " layers.Dropout(0.2),\n",
     248 + " layers.Dense(units=lay3, activation=\"relu\"),\n",
     249 + " layers.Dropout(0.2),\n",
     250 + " layers.Dense(1, activation=\"sigmoid\")])\n",
     251 + " optRMS = optimizers.RMSprop(lr=learning_rate)\n",
     252 + " model.compile(loss='binary_crossentropy', optimizer='RMSprop')\n",
     253 + " return model\n"
     254 + ]
     255 + },
     256 + {
     257 + "cell_type": "code",
     258 + "execution_count": 20,
     259 + "metadata": {},
     260 + "outputs": [],
     261 + "source": [
     262 + "keras_reg = wrappers.scikit_learn.KerasClassifier(build_model)\n",
     263 + "\n",
     264 + "param_distribs = {\"learning_rate\": reciprocal(0.0001, 0.0005), \"mt\": reciprocal(0.9, 0.94), \"regrt\":[0.001, 0.01, 0.1],\n",
     265 + "\t\t\t\t\t\"lay1\":[256,128], \"lay2\":[64,32], \"lay3\":[32,16],\n",
     266 + "\t\t\t\t\t\"initiali\":['glorot_uniform', 'he_uniform']\t}\n",
     267 + "# keras.layers.BatchNormalization(momentum=0.9),\n",
     268 + "\n",
     269 + "rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, cv=5, scoring='f1_macro', n_jobs=-1, error_score=1)\n"
     270 + ]
     271 + },
     272 + {
     273 + "cell_type": "code",
     274 + "execution_count": 21,
     275 + "metadata": {},
     276 + "outputs": [],
     277 + "source": [
     278 + "X_Train = preprocess_data.X_Train\n",
     279 + "Y_Train = read_data.Y_Train\n",
     280 + "\n",
     281 + "X_Test = preprocess_data.X_Test\n",
     282 + "Y_Test = read_data.Y_Test"
     283 + ]
     284 + },
     285 + {
     286 + "cell_type": "code",
     287 + "execution_count": 22,
     288 + "metadata": {},
     289 + "outputs": [],
     290 + "source": [
     291 + "batch_s = 2000\n",
     292 + "epoches = 75\n",
     293 + "ver = 2"
     294 + ]
     295 + },
     296 + {
     297 + "cell_type": "code",
     298 + "execution_count": null,
     299 + "metadata": {},
     300 + "outputs": [],
     301 + "source": [
     302 + "rnd_search_cv.fit(X_Train, Y_Train, batch_size=batch_s, epochs=epoches, verbose=ver)"
     303 + ]
     304 + },
     305 + {
     306 + "cell_type": "markdown",
     307 + "metadata": {},
     308 + "source": [
     309 + "# ASD on MEC: Model is already pre-trained"
     310 + ]
     311 + },
     312 + {
     313 + "cell_type": "markdown",
     314 + "metadata": {},
     315 + "source": [
     316 + "Model : fully connected Neural Network with dropout 0.2, RMSProp optimizer and binary-crossentropy loss function -\n",
     317 + "Epochs: 75 - \n",
     318 + "Batch size : 2000 connections per step "
     319 + ]
     320 + },
     321 + {
     322 + "cell_type": "markdown",
     323 + "metadata": {},
     324 + "source": [
     325 + "## Start Prediction"
     326 + ]
     327 + },
     328 + {
     329 + "cell_type": "code",
     330 + "execution_count": null,
     331 + "metadata": {},
     332 + "outputs": [],
     333 + "source": [
     334 + "pred_test = rnd_search_cv.predict(X_Test)"
     335 + ]
     336 + },
     337 + {
     338 + "cell_type": "markdown",
     339 + "metadata": {},
     340 + "source": [
     341 + "Classification report ready"
     342 + ]
     343 + },
     344 + {
     345 + "cell_type": "code",
     346 + "execution_count": null,
     347 + "metadata": {},
     348 + "outputs": [],
     349 + "source": [
     350 + "print(\"Classclassification_report: \\n\", classification_report(Y_Test, pred_test))"
     351 + ]
     352 + },
     353 + {
     354 + "cell_type": "code",
     355 + "execution_count": null,
     356 + "metadata": {},
     357 + "outputs": [],
     358 + "source": [
     359 + "print(\"Best estimator: \\n\", rnd_search_cv.best_estimator_)\n",
     360 + "print(\"Best score: \\n\", rnd_search_cv.best_score_)\n",
     361 + "print(\"Best params: \\n\", rnd_search_cv.best_params_)\n",
     362 + "print(\"Refit time: \\n\", rnd_search_cv.refit_time_)"
     363 + ]
     364 + },
     365 + {
     366 + "cell_type": "markdown",
     367 + "metadata": {},
     368 + "source": [
     369 + "Plot Confusion Matrix Graph"
     370 + ]
     371 + },
     372 + {
     373 + "cell_type": "code",
     374 + "execution_count": null,
     375 + "metadata": {},
     376 + "outputs": [],
     377 + "source": [
     378 + "from sklearn.metrics import confusion_matrix\n",
     379 + "cm = confusion_matrix(Y_Test, pred_test)\n",
     380 + "import itertools\n",
     381 + "classes = ['attack','normal']\n",
     382 + "plt.figure()\n",
     383 + "plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)\n",
     384 + "plt.title('Confusion matrix')\n",
     385 + "plt.colorbar()\n",
     386 + "tick_marks = np.arange(len(classes))\n",
     387 + "plt.xticks(tick_marks, classes, rotation=45)\n",
     388 + "plt.yticks(tick_marks, classes)\n",
     389 + "print(cm)\n",
     390 + "thresh = cm.max() / 2.\n",
     391 + "for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
     392 + " plt.text(j, i, cm[i, j].round(4),\n",
     393 + " horizontalalignment=\"center\",\n",
     394 + " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
     395 + "\n",
     396 + "plt.tight_layout()\n",
     397 + "plt.ylabel('True label')\n",
     398 + "plt.xlabel('Predicted label')"
     399 + ]
     400 + }
     401 + ],
     402 + "metadata": {
     403 + "kernelspec": {
     404 + "display_name": "Python 3",
     405 + "language": "python",
     406 + "name": "python3"
     407 + },
     408 + "language_info": {
     409 + "codemirror_mode": {
     410 + "name": "ipython",
     411 + "version": 3
     412 + },
     413 + "file_extension": ".py",
     414 + "mimetype": "text/x-python",
     415 + "name": "python",
     416 + "nbconvert_exporter": "python",
     417 + "pygments_lexer": "ipython3",
     418 + "version": "3.7.3"
     419 + }
     420 + },
     421 + "nbformat": 4,
     422 + "nbformat_minor": 2
     423 +}
     424 + 
Please wait...
Page is in error, reload to recover