STRLCPY/5G_security_malicious_traffic_detection

Adding jupyter notebook for ASD microservice
karalis committed 4 years ago

35082997

1 parent ca2df20c

Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)

Total 1 files

■ ■ ■ ■ ■ ■

ASD_dnn.ipynb

1	+	{
2	+	"cells": [
3	+	{
4	+	"cell_type": "code",
5	+	"execution_count": 6,
6	+	"metadata": {},
7	+	"outputs": [],
8	+	"source": [
9	+	"import pandas as pd\n",
10	+	"import numpy as np\n",
11	+	"import matplotlib"
12	+	]
13	+	},
14	+	{
15	+	"cell_type": "code",
16	+	"execution_count": 8,
17	+	"metadata": {},
18	+	"outputs": [],
19	+	"source": [
20	+	"import matplotlib.pyplot as plt\n",
21	+	"from sklearn.pipeline import Pipeline\n",
22	+	"from sklearn.preprocessing import MinMaxScaler\n",
23	+	"from sklearn.preprocessing import StandardScaler\n",
24	+	"from sklearn.preprocessing import OrdinalEncoder\n",
25	+	"from sklearn.impute import SimpleImputer\n",
26	+	"from sklearn.preprocessing import OneHotEncoder\n",
27	+	"from sklearn.compose import ColumnTransformer\n"
28	+	]
29	+	},
30	+	{
31	+	"cell_type": "code",
32	+	"execution_count": 9,
33	+	"metadata": {},
34	+	"outputs": [],
35	+	"source": [
36	+	"attack_types = {\n",
37	+	" 'normal': 'normal',\n",
38	+	"\n",
39	+	" 'back': 'DoS',\n",
40	+	" 'land': 'DoS',\n",
41	+	" 'neptune': 'DoS',\n",
42	+	" 'pod': 'DoS',\n",
43	+	" 'smurf': 'DoS',\n",
44	+	" 'teardrop': 'DoS',\n",
45	+	" 'mailbomb': 'DoS',\n",
46	+	" 'apache2': 'DoS',\n",
47	+	" 'processtable': 'DoS',\n",
48	+	" 'udpstorm': 'DoS',\n",
49	+	"\n",
50	+	" 'ipsweep': 'Probe',\n",
51	+	" 'nmap': 'Probe',\n",
52	+	" 'portsweep': 'Probe',\n",
53	+	" 'satan': 'Probe',\n",
54	+	" 'mscan': 'Probe',\n",
55	+	" 'saint': 'Probe',\n",
56	+	"\n",
57	+	" 'ftp_write': 'R2L',\n",
58	+	" 'guess_passwd': 'R2L',\n",
59	+	" 'imap': 'R2L',\n",
60	+	" 'multihop': 'R2L',\n",
61	+	" 'phf': 'R2L',\n",
62	+	" 'spy': 'R2L',\n",
63	+	" 'warezclient': 'R2L',\n",
64	+	" 'warezmaster': 'R2L',\n",
65	+	" 'sendmail': 'R2L',\n",
66	+	" 'named': 'R2L',\n",
67	+	" 'snmpgetattack': 'R2L',\n",
68	+	" 'snmpguess': 'R2L',\n",
69	+	" 'xlock': 'R2L',\n",
70	+	" 'xsnoop': 'R2L',\n",
71	+	" 'worm': 'R2L',\n",
72	+	"\n",
73	+	" 'buffer_overflow': 'U2R',\n",
74	+	" 'loadmodule': 'U2R',\n",
75	+	" 'perl': 'U2R',\n",
76	+	" 'rootkit': 'U2R',\n",
77	+	" 'httptunnel': 'U2R',\n",
78	+	" 'ps': 'U2R',\n",
79	+	" 'sqlattack': 'U2R',\n",
80	+	" 'xterm': 'U2R'\n",
81	+	"}\n"
82	+	]
83	+	},
84	+	{
85	+	"cell_type": "code",
86	+	"execution_count": 10,
87	+	"metadata": {},
88	+	"outputs": [],
89	+	"source": [
90	+	"is_attack = {\n",
91	+	" \"DoS\":\"attack\",\n",
92	+	" \"R2L\":\"attack\",\n",
93	+	" \"U2R\":\"attack\",\n",
94	+	" \"Probe\":\"attack\",\n",
95	+	" \"normal\":\"normal\"\n",
96	+	"}"
97	+	]
98	+	},
99	+	{
100	+	"cell_type": "code",
101	+	"execution_count": 11,
102	+	"metadata": {},
103	+	"outputs": [],
104	+	"source": [
105	+	"kdd_path_pkl = \"NSL_KDD/\"\n",
106	+	"kdd_path = \"NSL_KDD/\""
107	+	]
108	+	},
109	+	{
110	+	"cell_type": "code",
111	+	"execution_count": 12,
112	+	"metadata": {},
113	+	"outputs": [],
114	+	"source": [
115	+	"class read_data:\n",
116	+	" col_names = [\"duration\",\"protocol_type\",\"service\",\"flag\",\"src_bytes\",\n",
117	+	" \"dst_bytes\",\"land\",\"wrong_fragment\",\"urgent\",\"hot\",\"num_failed_logins\",\n",
118	+	" \"logged_in\",\"num_compromised\",\"root_shell\",\"su_attempted\",\"num_root\",\n",
119	+	" \"num_file_creations\",\"num_shells\",\"num_access_files\",\"num_outbound_cmds\",\n",
120	+	" \"is_host_login\",\"is_guest_login\",\"count\",\"srv_count\",\"serror_rate\",\n",
121	+	" \"srv_serror_rate\",\"rerror_rate\",\"srv_rerror_rate\",\"same_srv_rate\",\n",
122	+	" \"diff_srv_rate\",\"srv_diff_host_rate\",\"dst_host_count\",\"dst_host_srv_count\",\n",
123	+	" \"dst_host_same_srv_rate\",\"dst_host_diff_srv_rate\",\"dst_host_same_src_port_rate\",\n",
124	+	" \"dst_host_srv_diff_host_rate\",\"dst_host_serror_rate\",\"dst_host_srv_serror_rate\",\n",
125	+	" \"dst_host_rerror_rate\",\"dst_host_srv_rerror_rate\",\"label\", \"difficulty_level\"]\n",
126	+	"\n",
127	+	" KDDTrain = pd.read_csv(kdd_path+\"KDDTrain+.txt\",names = col_names,)\n",
128	+	" KDDTest = pd.read_csv(kdd_path+\"KDDTest+.txt\",names = col_names,)\n",
129	+	"\n",
130	+	"\n",
131	+	" KDDAll = pd.concat([KDDTrain, KDDTest])\n",
132	+	"\n",
133	+	" kdd_diff_level_all = KDDAll[\"difficulty_level\"].copy()\n",
134	+	" kdd_diff_level_train = KDDTrain[\"difficulty_level\"].copy()\n",
135	+	" kdd_diff_level_test = KDDTest[\"difficulty_level\"].copy()\n",
136	+	"\n",
137	+	" KDDAll = KDDAll.drop(\"difficulty_level\", axis = 1)\n",
138	+	"\n",
139	+	" KDDTrain.to_csv(kdd_path_pkl+\"KDDAll+.csv\")\n",
140	+	"\n",
141	+	" KDDTrain_len = KDDTrain.shape[0]\n",
142	+	" KDDTest_len = KDDTest.shape[0]\n",
143	+	"\n",
144	+	" KDDAll[\"type\"] = KDDAll.label.map(lambda x: attack_types[x])\n",
145	+	" KDDAll[\"isa\"] = KDDAll.type.map(lambda x: is_attack[x])\n",
146	+	"\n",
147	+	" KDDTrain[\"type\"] = KDDTrain.label.map(lambda x: attack_types[x])\n",
148	+	" KDDTrain[\"isa\"] = KDDTrain.type.map(lambda x: is_attack[x])\n",
149	+	"\n",
150	+	" KDDTest[\"type\"] = KDDTest.label.map(lambda x: attack_types[x])\n",
151	+	" KDDTest[\"isa\"] = KDDTest.type.map(lambda x: is_attack[x])\n",
152	+	"\n",
153	+	" kdd_attack_type_group = KDDAll.groupby(\"type\")\n",
154	+	" kdd_is_attack_group = KDDAll.groupby(\"isa\")\n",
155	+	"\n",
156	+	" kdd_attack_type_group.type.count()\n",
157	+	" kdd_is_attack_group[\"isa\"].count()\n",
158	+	"\n",
159	+	" KDDAll_is = KDDAll.copy()\n",
160	+	" KDDAll_type = KDDAll.copy()\n",
161	+	"\n",
162	+	" KDDAll_is_y = KDDAll[\"isa\"].copy()\n",
163	+	" KDDAll_is.drop([\"label\", \"isa\"], axis=1, inplace=True)\n",
164	+	" KDDAll_type_y = KDDAll[\"type\"].copy()\n",
165	+	" KDDAll_type.drop([\"label\", \"type\"], axis=1, inplace=True)\n",
166	+	"\n",
167	+	" KDDTrain_is_y = KDDTrain[\"isa\"].copy()\n",
168	+	" KDDTrain_type_y = KDDTrain[\"type\"].copy()\n",
169	+	"\n",
170	+	" KDDTest_is_y = KDDTest[\"isa\"].copy()\n",
171	+	" KDDTest_type_y = KDDTest[\"type\"].copy()\n",
172	+	"\n",
173	+	" class_mapping = {'attack': 0, 'normal': 1}\n",
174	+	" Y_Train = KDDTrain_is_y.map(class_mapping)\n",
175	+	" Y_Test = KDDTest_is_y.map(class_mapping)\n"
176	+	]
177	+	},
178	+	{
179	+	"cell_type": "code",
180	+	"execution_count": 14,
181	+	"metadata": {},
182	+	"outputs": [],
183	+	"source": [
184	+	"class preprocess_data:\n",
185	+	"\n",
186	+	" col_names_onehot = [\"protocol_type\",\"service\",\"flag\", \"type\"]\n",
187	+	" col_names_onehot_s = [\"protocol_type\",\"service\",\"flag\",\"type\"]\n",
188	+	" KDDAll_num = read_data.KDDAll_is.drop(col_names_onehot, axis=1) #pd\n",
189	+	" KDDAll_onehot_s = read_data.KDDAll_is[ col_names_onehot_s] #pd\n",
190	+	"\n",
191	+	" num_pipeline = Pipeline([('scaling', StandardScaler())])\n",
192	+	" cat_string_pipeline = Pipeline([('imputer', SimpleImputer(strategy = \"constant\", fill_value = \"missing\")), ('ordi', OrdinalEncoder()), ('onehots', OneHotEncoder(categories='auto'))])\n",
193	+	"\n",
194	+	" num_attribs = list(KDDAll_num)\n",
195	+	" cat_s_attribs = list(KDDAll_onehot_s)\n",
196	+	"\n",
197	+	" full_pipeline = ColumnTransformer([(\"num\", num_pipeline, num_attribs), (\"cats\", cat_string_pipeline, cat_s_attribs)])\n",
198	+	"\n",
199	+	" KDDAll_t = full_pipeline.fit_transform(read_data.KDDAll_is)\n",
200	+	"\n",
201	+	" X_Train = KDDAll_t[:read_data.KDDTrain_len]\n",
202	+	" X_Test = KDDAll_t[read_data.KDDTrain_len:read_data.KDDTrain_len + read_data.KDDTest_len]\n"
203	+	]
204	+	},
205	+	{
206	+	"cell_type": "code",
207	+	"execution_count": 15,
208	+	"metadata": {},
209	+	"outputs": [],
210	+	"source": [
211	+	"import tensorflow as tf\n",
212	+	"from tensorflow.keras.layers import Dense\n",
213	+	"from tensorflow.keras import optimizers\n",
214	+	"from tensorflow.keras import models\n",
215	+	"from tensorflow.keras import layers\n",
216	+	"from tensorflow.keras import wrappers\n",
217	+	"from tensorflow.keras import initializers\n",
218	+	"from tensorflow.keras import regularizers\n",
219	+	"from tensorflow.keras import losses\n",
220	+	"from scipy.stats import reciprocal\n",
221	+	"from sklearn.model_selection import RandomizedSearchCV\n",
222	+	"from tensorflow.keras.wrappers.scikit_learn import KerasClassifier\n",
223	+	"from sklearn.metrics import classification_report\n",
224	+	"import time"
225	+	]
226	+	},
227	+	{
228	+	"cell_type": "code",
229	+	"execution_count": 16,
230	+	"metadata": {},
231	+	"outputs": [],
232	+	"source": [
233	+	"features_dim = preprocess_data.X_Train.shape[1]"
234	+	]
235	+	},
236	+	{
237	+	"cell_type": "code",
238	+	"execution_count": 19,
239	+	"metadata": {},
240	+	"outputs": [],
241	+	"source": [
242	+	"def build_model(learning_rate,mt,regrt, lay1, lay2, lay3, initiali):\n",
243	+	" model = models.Sequential([\n",
244	+	" layers.Dense(units=lay1, input_shape=(features_dim,), activation=\"relu\", kernel_initializer=initiali, bias_initializer='zeros'),\n",
245	+	" layers.Dropout(0.2),\n",
246	+	" layers.Dense(units=lay2, activation=\"relu\"),\n",
247	+	" layers.Dropout(0.2),\n",
248	+	" layers.Dense(units=lay3, activation=\"relu\"),\n",
249	+	" layers.Dropout(0.2),\n",
250	+	" layers.Dense(1, activation=\"sigmoid\")])\n",
251	+	" optRMS = optimizers.RMSprop(lr=learning_rate)\n",
252	+	" model.compile(loss='binary_crossentropy', optimizer='RMSprop')\n",
253	+	" return model\n"
254	+	]
255	+	},
256	+	{
257	+	"cell_type": "code",
258	+	"execution_count": 20,
259	+	"metadata": {},
260	+	"outputs": [],
261	+	"source": [
262	+	"keras_reg = wrappers.scikit_learn.KerasClassifier(build_model)\n",
263	+	"\n",
264	+	"param_distribs = {\"learning_rate\": reciprocal(0.0001, 0.0005), \"mt\": reciprocal(0.9, 0.94), \"regrt\":[0.001, 0.01, 0.1],\n",
265	+	"\t\t\t\t\t\"lay1\":[256,128], \"lay2\":[64,32], \"lay3\":[32,16],\n",
266	+	"\t\t\t\t\t\"initiali\":['glorot_uniform', 'he_uniform']\t}\n",
267	+	"# keras.layers.BatchNormalization(momentum=0.9),\n",
268	+	"\n",
269	+	"rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, cv=5, scoring='f1_macro', n_jobs=-1, error_score=1)\n"
270	+	]
271	+	},
272	+	{
273	+	"cell_type": "code",
274	+	"execution_count": 21,
275	+	"metadata": {},
276	+	"outputs": [],
277	+	"source": [
278	+	"X_Train = preprocess_data.X_Train\n",
279	+	"Y_Train = read_data.Y_Train\n",
280	+	"\n",
281	+	"X_Test = preprocess_data.X_Test\n",
282	+	"Y_Test = read_data.Y_Test"
283	+	]
284	+	},
285	+	{
286	+	"cell_type": "code",
287	+	"execution_count": 22,
288	+	"metadata": {},
289	+	"outputs": [],
290	+	"source": [
291	+	"batch_s = 2000\n",
292	+	"epoches = 75\n",
293	+	"ver = 2"
294	+	]
295	+	},
296	+	{
297	+	"cell_type": "code",
298	+	"execution_count": null,
299	+	"metadata": {},
300	+	"outputs": [],
301	+	"source": [
302	+	"rnd_search_cv.fit(X_Train, Y_Train, batch_size=batch_s, epochs=epoches, verbose=ver)"
303	+	]
304	+	},
305	+	{
306	+	"cell_type": "markdown",
307	+	"metadata": {},
308	+	"source": [
309	+	"# ASD on MEC: Model is already pre-trained"
310	+	]
311	+	},
312	+	{
313	+	"cell_type": "markdown",
314	+	"metadata": {},
315	+	"source": [
316	+	"Model : fully connected Neural Network with dropout 0.2, RMSProp optimizer and binary-crossentropy loss function -\n",
317	+	"Epochs: 75 - \n",
318	+	"Batch size : 2000 connections per step "
319	+	]
320	+	},
321	+	{
322	+	"cell_type": "markdown",
323	+	"metadata": {},
324	+	"source": [
325	+	"## Start Prediction"
326	+	]
327	+	},
328	+	{
329	+	"cell_type": "code",
330	+	"execution_count": null,
331	+	"metadata": {},
332	+	"outputs": [],
333	+	"source": [
334	+	"pred_test = rnd_search_cv.predict(X_Test)"
335	+	]
336	+	},
337	+	{
338	+	"cell_type": "markdown",
339	+	"metadata": {},
340	+	"source": [
341	+	"Classification report ready"
342	+	]
343	+	},
344	+	{
345	+	"cell_type": "code",
346	+	"execution_count": null,
347	+	"metadata": {},
348	+	"outputs": [],
349	+	"source": [
350	+	"print(\"Classclassification_report: \\n\", classification_report(Y_Test, pred_test))"
351	+	]
352	+	},
353	+	{
354	+	"cell_type": "code",
355	+	"execution_count": null,
356	+	"metadata": {},
357	+	"outputs": [],
358	+	"source": [
359	+	"print(\"Best estimator: \\n\", rnd_search_cv.best_estimator_)\n",
360	+	"print(\"Best score: \\n\", rnd_search_cv.best_score_)\n",
361	+	"print(\"Best params: \\n\", rnd_search_cv.best_params_)\n",
362	+	"print(\"Refit time: \\n\", rnd_search_cv.refit_time_)"
363	+	]
364	+	},
365	+	{
366	+	"cell_type": "markdown",
367	+	"metadata": {},
368	+	"source": [
369	+	"Plot Confusion Matrix Graph"
370	+	]
371	+	},
372	+	{
373	+	"cell_type": "code",
374	+	"execution_count": null,
375	+	"metadata": {},
376	+	"outputs": [],
377	+	"source": [
378	+	"from sklearn.metrics import confusion_matrix\n",
379	+	"cm = confusion_matrix(Y_Test, pred_test)\n",
380	+	"import itertools\n",
381	+	"classes = ['attack','normal']\n",
382	+	"plt.figure()\n",
383	+	"plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)\n",
384	+	"plt.title('Confusion matrix')\n",
385	+	"plt.colorbar()\n",
386	+	"tick_marks = np.arange(len(classes))\n",
387	+	"plt.xticks(tick_marks, classes, rotation=45)\n",
388	+	"plt.yticks(tick_marks, classes)\n",
389	+	"print(cm)\n",
390	+	"thresh = cm.max() / 2.\n",
391	+	"for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
392	+	" plt.text(j, i, cm[i, j].round(4),\n",
393	+	" horizontalalignment=\"center\",\n",
394	+	" color=\"white\" if cm[i, j] > thresh else \"black\")\n",
395	+	"\n",
396	+	"plt.tight_layout()\n",
397	+	"plt.ylabel('True label')\n",
398	+	"plt.xlabel('Predicted label')"
399	+	]
400	+	}
401	+	],
402	+	"metadata": {
403	+	"kernelspec": {
404	+	"display_name": "Python 3",
405	+	"language": "python",
406	+	"name": "python3"
407	+	},
408	+	"language_info": {
409	+	"codemirror_mode": {
410	+	"name": "ipython",
411	+	"version": 3
412	+	},
413	+	"file_extension": ".py",
414	+	"mimetype": "text/x-python",
415	+	"name": "python",
416	+	"nbconvert_exporter": "python",
417	+	"pygments_lexer": "ipython3",
418	+	"version": "3.7.3"
419	+	}
420	+	},
421	+	"nbformat": 4,
422	+	"nbformat_minor": 2
423	+	}
424	+

Adding jupyter notebook for ASD microservice