| skipped 410 lines |
411 | 411 | | |
412 | 412 | | |
413 | 413 | | # Calculate Hidden Markov Model |
414 | | - | def decodehmm(frame, hosts, users, stime): |
| 414 | + | def decodehmm(frame, users, stime): |
415 | 415 | | detect_hmm = [] |
416 | 416 | | model = joblib.load(FPATH + "/model/hmm.pkl") |
417 | 417 | | while(1): |
418 | 418 | | date = stime.strftime("%Y-%m-%d") |
419 | 419 | | for user in users: |
| 420 | + | hosts = np.unique(frame[(frame["user"] == user)].host.values) |
420 | 421 | | for host in hosts: |
421 | | - | udata = np.array([]) |
| 422 | + | udata = [] |
422 | 423 | | for _, data in frame[(frame["date"].str.contains(date)) & (frame["user"] == user) & (frame["host"] == host)].iterrows(): |
423 | 424 | | id = data["id"] |
424 | 425 | | if id == 4776: |
425 | | - | udata = np.append(udata, [0], axis=0) |
| 426 | + | udata.append(0) |
426 | 427 | | elif id == 4768: |
427 | | - | udata = np.append(udata, [1], axis=0) |
| 428 | + | udata.append(1) |
428 | 429 | | elif id == 4769: |
429 | | - | udata = np.append(udata, [2], axis=0) |
| 430 | + | udata.append(2) |
430 | 431 | | elif id == 4624: |
431 | | - | udata = np.append(udata, [3], axis=0) |
| 432 | + | udata.append(3) |
432 | 433 | | elif id == 4625: |
433 | | - | udata = np.append(udata, [4], axis=0) |
434 | | - | if udata.shape[0] > 2: |
435 | | - | data_decode = model.predict(np.array([udata], dtype="int").T) |
| 434 | + | udata.append(4) |
| 435 | + | if len(udata) > 2: |
| 436 | + | data_decode = model.predict(np.array([np.array(udata)], dtype="int").T) |
436 | 437 | | unique_data = np.unique(data_decode) |
437 | 438 | | if unique_data.shape[0] == 2: |
438 | 439 | | if user not in detect_hmm: |
| skipped 7 lines |
446 | 447 | | |
447 | 448 | | |
448 | 449 | | # Learning Hidden Markov Model |
449 | | - | def learnhmm(frame, hosts, users, stime): |
| 450 | + | def learnhmm(frame, users, stime): |
450 | 451 | | lengths = [] |
451 | 452 | | data_array = np.array([]) |
452 | 453 | | # start_probability = np.array([0.52, 0.37, 0.11]) |
| skipped 3 lines |
456 | 457 | | while(1): |
457 | 458 | | date = stime.strftime("%Y-%m-%d") |
458 | 459 | | for user in users: |
| 460 | + | hosts = np.unique(frame[(frame["user"] == user)].host.values) |
459 | 461 | | for host in hosts: |
460 | 462 | | udata = np.array([]) |
461 | 463 | | for _, data in frame[(frame["date"].str.contains(date)) & (frame["user"] == user) & (frame["host"] == host)].iterrows(): |
| skipped 371 lines |
833 | 835 | | ml_frame = ml_frame.sort_values(by="date") |
834 | 836 | | if args.learn: |
835 | 837 | | print("[*] Learning event logs using Hidden Markov Model.") |
836 | | - | learnhmm(ml_frame, event_set["ipaddress"].drop_duplicates(), username_set, datetime.datetime(*starttime.timetuple()[:3])) |
| 838 | + | learnhmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) |
837 | 839 | | |
838 | 840 | | # Calculate ChangeFinder |
839 | 841 | | print("[*] Calculate ChangeFinder.") |
| skipped 1 lines |
841 | 843 | | |
842 | 844 | | # Calculate Hidden Markov Model |
843 | 845 | | print("[*] Calculate Hidden Markov Model.") |
844 | | - | detect_hmm = decodehmm(ml_frame, event_set["ipaddress"].drop_duplicates(), username_set, datetime.datetime(*starttime.timetuple()[:3])) |
| 846 | + | detect_hmm = decodehmm(ml_frame, username_set, datetime.datetime(*starttime.timetuple()[:3])) |
845 | 847 | | |
846 | 848 | | # Calculate PageRank |
847 | 849 | | print("[*] Calculate PageRank.") |
| skipped 150 lines |