Projects STRLCPY LogonTracer Commits 97e0dc4e
🤬
  • Fixed a performance issue when importing EVXT file

    Changed EVTX parsing Python module from python-evtx to evtx
  • Loading...
  • shusei tomonaga committed 4 years ago
    97e0dc4e
    1 parent 75ec302d
  • ■ ■ ■ ■ ■ ■
    logontracer.py
    skipped 20 lines
    21 21   has_lxml = False
    22 22   
    23 23  try:
    24  - from Evtx.Evtx import Evtx
    25  - from Evtx.Views import evtx_file_xml_view
     24 + from evtx import PyEvtxParser
    26 25   has_evtx = True
    27 26  except ImportError:
    28 27   has_evtx = False
    skipped 311 lines
    340 339   
    341 340   if os.path.exists(UPLOAD_DIR) is False:
    342 341   os.mkdir(UPLOAD_DIR)
    343  - print("[+] make upload folder %s." % UPLOAD_DIR)
     342 + print("[+] make upload folder {0}.".format(UPLOAD_DIR))
    344 343   
    345 344   try:
    346 345   timezone = request.form["timezone"]
    skipped 220 lines
    567 566   
    568 567  def to_lxml(record_xml):
    569 568   rep_xml = record_xml.replace("xmlns=\"http://schemas.microsoft.com/win/2004/08/events/event\"", "")
    570  - set_xml = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?>%s" % rep_xml
    571  - fin_xml = set_xml.encode("utf-8")
     569 + fin_xml = rep_xml.encode("utf-8")
    572 570   parser = etree.XMLParser(resolve_entities=False)
    573 571   return etree.fromstring(fin_xml, parser)
    574 572   
    575 573   
    576 574  def xml_records(filename):
    577 575   if args.evtx:
    578  - with Evtx(filename) as evtx:
    579  - for xml, record in evtx_file_xml_view(evtx.get_file_header()):
     576 + with open(filename, "rb") as evtx:
     577 + parser = PyEvtxParser(evtx)
     578 + for record in parser.records():
    580 579   try:
    581  - yield to_lxml(xml), None
     580 + yield to_lxml(record["data"]), None
    582 581   except etree.XMLSyntaxError as e:
    583  - yield xml, e
     582 + yield record["data"], e
    584 583   
    585 584   if args.xmls:
    586 585   xdata = ""
    skipped 6 lines
    593 592   for xml in xml_list:
    594 593   if xml.startswith("<System>"):
    595 594   try:
    596  - yield to_lxml("<Event>" + xml), None
     595 + yield to_lxml("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><Event>" + xml), None
    597 596   except etree.XMLSyntaxError as e:
    598 597   yield xml, e
    599 598   
    skipped 69 lines
    669 668   
    670 669   if os.path.exists(cache_dir) is False:
    671 670   os.mkdir(cache_dir)
    672  - print("[+] make cache folder %s." % cache_dir)
     671 + print("[+] make cache folder {0}.".format(cache_dir))
    673 672   
    674 673   if args.timezone:
    675 674   try:
    676 675   datetime.timezone(datetime.timedelta(hours=args.timezone))
    677 676   tzone = args.timezone
    678  - print("[+] Time zone is %s." % args.timezone)
     677 + print("[+] Time zone is {0}.".format(args.timezone))
    679 678   except:
    680  - sys.exit("[!] Can't load time zone '%s'." % args.timezone)
     679 + sys.exit("[!] Can't load time zone {0}.".format(args.timezone))
    681 680   else:
    682 681   tzone = 0
    683 682   
    684 683   if args.fromdate:
    685 684   try:
    686 685   fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S")
    687  - print("[+] Parse the EVTX from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
     686 + print("[+] Parse the EVTX from {0}.".format(fdatetime.strftime("%Y-%m-%d %H:%M:%S")))
    688 687   except:
    689 688   sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")
    690 689   
    691 690   if args.todate:
    692 691   try:
    693 692   tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
    694  - print("[+] Parse the EVTX from %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
     693 + print("[+] Parse the EVTX from {0}.".format(tdatetime.strftime("%Y-%m-%d %H:%M:%S")))
    695 694   except:
    696 695   sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")
    697 696   
    skipped 4 lines
    702 701   if fb_data != EVTX_HEADER:
    703 702   sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file))
    704 703   
    705  - chunk = -2
    706  - with Evtx(evtx_file) as evtx:
    707  - fh = evtx.get_file_header()
    708  - try:
    709  - while True:
    710  - last_chunk = list(evtx.chunks())[chunk]
    711  - last_record = last_chunk.file_last_record_number()
    712  - chunk -= 1
    713  - if last_record > 0:
    714  - record_sum = record_sum + last_record
    715  - break
    716  - except:
    717  - record_sum = record_sum + fh.next_record_number()
     704 + with open(evtx_file, "rb") as evtx:
     705 + parser = PyEvtxParser(evtx)
     706 + records = list(parser.records())
     707 + record_sum = len(records)
    718 708   
    719 709   if args.xmls:
    720 710   with open(evtx_file, "r") as fb:
    skipped 3 lines
    724 714   for line in fb:
    725 715   record_sum += line.count("<System>")
    726 716   
    727  - print("[+] Last record number is %i." % record_sum)
     717 + print("[+] Last record number is {0}.".format(record_sum))
    728 718   
    729 719   # Parse Event log
    730 720   print("[+] Start parsing the EVTX file.")
    731 721   
    732 722   for evtx_file in evtx_list:
    733  - print("[+] Parse the EVTX file %s." % evtx_file)
     723 + print("[+] Parse the EVTX file {0}.".format(evtx_file))
    734 724   
    735 725   for node, err in xml_records(evtx_file):
    736 726   if err is not None:
    skipped 2 lines
    739 729   eventid = int(node.xpath("/Event/System/EventID")[0].text)
    740 730   
    741 731   if not count % 100:
    742  - sys.stdout.write("\r[+] Now loading %i records." % count)
     732 + sys.stdout.write("\r[+] Now loading {0} records.".format(count))
    743 733   sys.stdout.flush()
    744 734   
    745 735   if eventid in EVENT_ID:
    skipped 244 lines
    990 980   deletelog.append("-")
    991 981   
    992 982   print("\n[+] Load finished.")
    993  - print("[+] Total Event log is %i." % count)
     983 + print("[+] Total Event log is {0}.".format(count))
    994 984   
    995 985   if not username_set or not len(event_set):
    996 986   sys.exit("[!] This event log did not include logs to be visualized. Please check the details of the event log.")
    997 987   else:
    998  - print("[+] Fildered Event log is %i." % len(event_set))
     988 + print("[+] Fildered Event log is {0}.".format(len(event_set)))
    999 989   
    1000 990   tohours = int((endtime - starttime).total_seconds() / 3600)
    1001 991   
    skipped 197 lines
    1199 1189   try:
    1200 1190   datetime.timezone(datetime.timedelta(hours=args.timezone))
    1201 1191   tzone = args.timezone
    1202  - print("[+] Time zone is %s." % args.timezone)
     1192 + print("[+] Time zone is {0}.".format(args.timezone))
    1203 1193   except:
    1204  - sys.exit("[!] Can't load time zone '%s'." % args.timezone)
     1194 + sys.exit("[!] Can't load time zone {0}.".format(args.timezone))
    1205 1195   else:
    1206 1196   tzone = 0
    1207 1197   
    1208 1198   if args.fromdate:
    1209 1199   try:
    1210 1200   fdatetime = datetime.datetime.strptime(args.fromdate, "%Y%m%d%H%M%S")
    1211  - print("[+] Search ES from %s." % fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
     1201 + print("[+] Search ES from {0}.".format(fdatetime.strftime("%Y-%m-%d %H:%M:%S")))
    1212 1202   except:
    1213 1203   sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")
    1214 1204   
    1215 1205   if args.todate:
    1216 1206   try:
    1217 1207   tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
    1218  - print("[+] Search ES to %s." % tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
     1208 + print("[+] Search ES to {0}.".format(tdatetime.strftime("%Y-%m-%d %H:%M:%S")))
    1219 1209   except:
    1220 1210   sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")
    1221 1211   # Parse Event log
    skipped 50 lines
    1272 1262   eventid = event.event_id
    1273 1263   
    1274 1264   if not count % 100:
    1275  - sys.stdout.write("\r[+] Now loading %i records." % count)
     1265 + sys.stdout.write("\r[+] Now loading {0} records.".format(count))
    1276 1266   sys.stdout.flush()
    1277 1267   
    1278 1268   if eventid in EVENT_ID:
    skipped 220 lines
    1499 1489   deletelog.append("-")
    1500 1490   
    1501 1491   print("\n[+] Load finished.")
    1502  - print("[+] Total Event log is %i." % count)
     1492 + print("[+] Total Event log is {0}.".format(count))
    1503 1493   
    1504 1494   if not username_set or not len(event_set):
    1505 1495   sys.exit("[!] This event log did not include logs to be visualized. Please check the details of the event log.")
    1506 1496   else:
    1507  - print("[+] Fildered Event log is %i." % len(event_set))
     1497 + print("[+] Fildered Event log is {0}.".format(len(event_set)))
    1508 1498   
    1509 1499   tohours = int((endtime - starttime).total_seconds() / 3600)
    1510 1500   
    skipped 130 lines
    1641 1631   sys.exit("[!] py2neo must be installed for this script.")
    1642 1632   
    1643 1633   if not has_evtx:
    1644  - sys.exit("[!] python-evtx must be installed for this script.")
     1634 + sys.exit("[!] evtx must be installed for this script.")
    1645 1635   
    1646 1636   if not has_lxml:
    1647 1637   sys.exit("[!] lxml must be installed for this script.")
    skipped 23 lines
    1671 1661   except:
    1672 1662   sys.exit("[!] Can't connect Neo4j Database.")
    1673 1663   
    1674  - print("[+] Script start. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
     1664 + print("[+] Script start. {0}".format(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
    1675 1665   
    1676 1666   try:
    1677 1667   print("[+] Neo4j Kernel version: {0}".format(".".join(map(str, db.kernel_start_time))))
    skipped 14 lines
    1692 1682   cache_dir = os.path.join(FPATH, 'cache')
    1693 1683   if os.path.exists(cache_dir):
    1694 1684   shutil.rmtree(cache_dir)
    1695  - print("[+] Delete cache folder %s." % cache_dir)
     1685 + print("[+] Delete cache folder {0}.".format(cache_dir))
    1696 1686   
    1697 1687   if args.evtx:
    1698 1688   for evtx_file in args.evtx:
    skipped 10 lines
    1709 1699   if args.es:
    1710 1700   parse_es()
    1711 1701   
    1712  - print("[+] Script end. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
     1702 + print("[+] Script end. {0}".format(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
    1713 1703   
    1714 1704   
    1715 1705  if __name__ == "__main__":
    skipped 2 lines
  • ■ ■ ■ ■
    requirements.txt
    1 1  numpy
    2 2  py2neo
    3  -python-evtx
     3 +evtx
    4 4  lxml
    5 5  scipy==1.2.1
    6 6  changefinder
    skipped 5 lines
Please wait...
Page is in error, reload to recover