Projects STRLCPY Maryam Commits 616a293a
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    README.md | 100755 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100644
    skipped 41 lines
    42 42   
    43 43  # Latest Updates
    44 44   
     45 + - Added image_search module
    45 46   - Clustering, Meta Search Engine, Dark-Web Search
    46 47   - Iris: the first beta version
    47 48   - Added famous_person
    48 49   - Core speedup optimizations
    49 50   - Added setup.py
    50  - - Web API: web command
     51 + 
    51 52   
    52 53   
    53 54   
    skipped 21 lines
  • maryam/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/__main__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/core/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/core/basedir.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/core/core.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/core/initial.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/core/util/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • ■ ■ ■ ■ ■ ■
    maryam/core/util/engines/bing_images.py
     1 +"""
     2 +OWASP Maryam!
     3 +This program is free software: you can redistribute it and/or modify
     4 +it under the terms of the GNU General Public License as published by
     5 +the Free Software Foundation, either version 3 of the License, or
     6 +any later version.
     7 +This program is distributed in the hope that it will be useful,
     8 +but WITHOUT ANY WARRANTY; without even the implied warranty of
     9 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     10 +GNU General Public License for more details.
     11 +You should have received a copy of the GNU General Public License
     12 +along with this program. If not, see <http://www.gnu.org/licenses/>.
     13 +"""
     14 + 
     15 +from lxml import html
     16 +from json import loads
     17 + 
     18 +class main:
     19 + def __init__(self, q, limit=3, count=28):
     20 + """ bing.com search engine
     21 + q : Query for search
     22 + limit : Number of pages
     23 + count : Number of results
     24 + """
     25 + self.framework = main.framework
     26 + self.q = q
     27 + self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0'
     28 + self.url = 'https://www.bing.com/images/search'
     29 + self._pages = ''
     30 + self.count = 28 if count > 28 else count
     31 + self.limit = 10 if limit > 10 else limit
     32 + 
     33 + def run_crawl(self):
     34 + set_page = lambda x: (x*self.count)+1
     35 + payload = {'count': self.count, 'first': 1, 'form': 'IBASEP', 'q': self.q}
     36 + max_attempt = 0
     37 + for i in range(self.limit):
     38 + self.framework.verbose(f"[BING Image] Searching in {i} page...", end='\r')
     39 + try:
     40 + req = self.framework.request(
     41 + url=self.url,
     42 + params=payload,
     43 + headers={'user-agent': self.agent},
     44 + allow_redirects=True)
     45 + except Exception as e:
     46 + self.framework.error(f"ConnectionError: {e}", 'util/engines/bing_images', 'run_crawl')
     47 + max_attempt += 1
     48 + if max_attempt == self.limit:
     49 + self.framework.error('Bing is missed!', 'util/engines/bing_images', 'run_crawl')
     50 + break
     51 + else:
     52 + self._pages += req.text
     53 + payload['first'] = set_page(i+1)
     54 + 
     55 + @property
     56 + def results(self):
     57 + items = []
     58 + tree = html.fromstring(self._pages)
     59 + for item in tree.xpath('//div[@class="imgpt"]'):
     60 + try:
     61 + info = item.xpath('.//div[@class="img_info hon"]')[0].xpath("./span[@class='nowrap']")[0].text
     62 + m = loads(item.xpath('./a/@m')[0])
     63 + title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
     64 + items.append({'i': m['murl'],
     65 + 't': title,
     66 + 'a': m['purl'],
     67 + 'd': info
     68 + })
     69 + except:
     70 + continue
     71 + return items
     72 + 
     73 + @property
     74 + def pages(self):
     75 + return self._pages
     76 + 
     77 + @property
     78 + def links(self):
     79 + return [x['a'] for x in self.results]
     80 + 
     81 + 
  • ■ ■ ■ ■
    maryam/core/util/engines/google.py
    skipped 24 lines
    25 25   self.q = q
    26 26   self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'
    27 27   self.xpath_name_original = {
    28  - 'results': '//div[@class="g"]|//div[@class="g tF2Cxc"]|//div[@class="g Ww4FFb tF2Cxc"]',
     28 + 'results': '//div[@class="MjjYud"]|//div[@class="g"]|//div[@class="g tF2Cxc"]|//div[@class="g Ww4FFb tF2Cxc"]',
    29 29   'results_content': './/div[@data-content-feature="1"]|.//div[@class="VwiC3b yXK7lf MUxGbd yDYNvb lyLwlc lEBKkf"]',
    30 30   'results_title': './/h3[1]',
    31 31   'results_a': './/div[@class="yuRUbf"]/a',
    skipped 171 lines
  • ■ ■ ■ ■ ■ ■
    maryam/core/util/engines/google_images.py
     1 +"""
     2 +OWASP Maryam!
     3 +This program is free software: you can redistribute it and/or modify
     4 +it under the terms of the GNU General Public License as published by
     5 +the Free Software Foundation, either version 3 of the License, or
     6 +any later version.
     7 +This program is distributed in the hope that it will be useful,
     8 +but WITHOUT ANY WARRANTY; without even the implied warranty of
     9 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     10 +GNU General Public License for more details.
     11 +You should have received a copy of the GNU General Public License
     12 +along with this program. If not, see <http://www.gnu.org/licenses/>.
     13 +"""
     14 + 
     15 +from lxml import html
     16 + 
     17 + 
     18 +class main:
     19 + def __init__(self, q):
     20 + """ google.com image search engine
     21 + q : Query for search
     22 + """
     23 + self.framework = main.framework
     24 + self.q = q
     25 + self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'
     26 + self.xpath_name_original = {
     27 + 'results': '//div[@class="isv-r PNCib MSM1fd BUooTd"]',
     28 + 'results_content': './/img',
     29 + 'results_title': './/h3',
     30 + 'results_a': './/a',
     31 + 'results_cite': './/div[@class="dmeZbb"]'
     32 + }
     33 + self.xpath_original = {
     34 + self.xpath_name_original['results']: [
     35 + self.xpath_name_original['results_content'],
     36 + self.xpath_name_original['results_title'],
     37 + self.xpath_name_original['results_a'],
     38 + self.xpath_name_original['results_cite']
     39 + ]
     40 + }
     41 + self.url = 'https://www.google.com/search'
     42 + self._pages = ''
     43 + 
     44 + def run_crawl(self):
     45 + payload = {'ie': 'utf-8', 'oe': 'utf-8', 'q': self.q, 'filter': '0', 'tbm': 'isch'}
     46 + self.framework.verbose(f"[GOOGLE Images] Searching...", end='\r')
     47 + try:
     48 + req = self.framework.request(
     49 + url=self.url,
     50 + params=payload,
     51 + headers={'user-agent': self.agent},
     52 + allow_redirects=True)
     53 + except Exception as e:
     54 + self.framework.error(f"ConnectionError: {e}", 'util/google_images', 'run_crawl')
     55 + self.framework.error('Google is missed!', 'util/google_images', 'run_crawl')
     56 + else:
     57 + if req.status_code in (503, 429):
     58 + self.framework.error('Google CAPTCHA triggered.', 'util/google_images', 'run_crawl')
     59 + return
     60 + if req.status_code in (301, 302):
     61 + redirect = req.headers['location']
     62 + req = self.framework.request(url=redirect, allow_redirects=False)
     63 + 
     64 + self._pages += req.text
     65 + 
     66 + @property
     67 + def results(self):
     68 + tree = html.fromstring(self._pages)
     69 + output = []
     70 + results = []
     71 + results = tree.xpath(self.xpath_name_original['results'])
     72 + for i in results:
     73 + a = i.xpath(self.xpath_name_original['results_content'])
     74 + b = i.xpath(self.xpath_name_original['results_title'])
     75 + c = i.xpath(self.xpath_name_original['results_a'])
     76 + d = i.xpath(self.xpath_name_original['results_cite'])
     77 + roler = {}
     78 + if all([a,b,c,d]):
     79 + if 'https://' not in a[0].values()[0]:
     80 + continue
     81 + roler['i'] = a[0].values()[0]
     82 + roler['t'] = b[0].text_content()
     83 + roler['a'] = c[1].get('href')
     84 + roler['d'] = d[0].text_content()
     85 + output.append(roler)
     86 + return output
     87 + 
     88 + @property
     89 + def pages(self):
     90 + return self._pages
     91 + 
     92 + @property
     93 + def links(self):
     94 + links = [x['a'] for x in self.results]
     95 + return links
     96 + 
     97 + @property
     98 + def dns(self):
     99 + return self.framework.page_parse(self._pages).get_dns(self.q, self.links)
     100 + 
     101 + @property
     102 + def emails(self):
     103 + return self.framework.page_parse(self._pages).get_emails(self.q)
     104 + 
     105 + @property
     106 + def docs(self):
     107 + return self.framework.page_parse(self._pages).get_docs(self.q, self.links)
     108 + 
  • ■ ■ ■ ■ ■ ■
    maryam/core/util/engines/qwant.py
    skipped 14 lines
    15 15  along with this program. If not, see <http://www.gnu.org/licenses/>.
    16 16  """
    17 17   
    18  -import re
    19 18   
    20 19  class main:
    21 20   
    skipped 21 lines
    43 42   try:
    44 43   req = self.framework.request(url=self.qwant, params=payload)
    45 44   except Exception as e:
    46  - self.framework.error(f"ConnectionError {e}.", 'util/engines/qwant', 'name_crawl')
    47  - self.framework.error('Qwant is missed!', 'util/engines/qwant', 'name_crawl')
     45 + self.framework.error(f"ConnectionError {e}.", 'util/engines/qwant', 'util/engines/qwant', 'run_crawl')
     46 + self.framework.error('Qwant is missed!', 'util/engines/qwant', 'util/engines/qwant', 'run_crawl')
    48 47   break
    49 48   else:
    50 49   if req.status_code == 429 and "I can't let you do that..." in req.text and '<div class="error-code">' in req.text:
    51  - self.framework.error('429 Too Many Requests')
     50 + self.framework.error('429 Too Many Requests', 'util/engines/qwant', 'run_crawl')
    52 51   return
    53 52   self._pages += req.text
    54 53   try:
    55 54   self._json.append(req.json())
    56 55   except Exception as e:
    57  - self.framework.error('429 Too Many Requests')
     56 + self.framework.error('429 Too Many Requests', 'util/engines/qwant', 'run_crawl')
    58 57   return
    59 58   else:
    60 59   if req.json() == {'status': 'error', 'data': {'error_code': 22}}:
    61  - self.framework.error('429 Too Many Requests')
     60 + self.framework.error('429 Too Many Requests', 'util/engines/qwant', 'run_crawl')
    62 61   return
    63 62   else:
    64 63   if page == self.limit:
    skipped 53 lines
  • ■ ■ ■ ■ ■ ■
    maryam/core/util/engines/qwant_images.py
     1 +"""
     2 +OWASP Maryam!
     3 + 
     4 +This program is free software: you can redistribute it and/or modify
     5 +it under the terms of the GNU General Public License as published by
     6 +the Free Software Foundation, either version 3 of the License, or
     7 +any later version.
     8 + 
     9 +This program is distributed in the hope that it will be useful,
     10 +but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     12 +GNU General Public License for more details.
     13 + 
     14 +You should have received a copy of the GNU General Public License
     15 +along with this program. If not, see <http://www.gnu.org/licenses/>.
     16 +"""
     17 + 
     18 + 
     19 +class main:
     20 + 
     21 + def __init__(self, q, limit=1, count=50):
     22 + """ qwant.com image search engine
     23 + 
     24 + q : Query for search
     25 + limit : Number of pages
     26 + count : Number of results
     27 + """
     28 + self.framework = main.framework
     29 + self.q = q
     30 + self.limit = 10 if limit > 10 else limit
     31 + self.count = 50 if count > 50 else count
     32 + self._pages = ''
     33 + self._json = []
     34 + self.qwant = 'https://api.qwant.com/v3/search/images'
     35 + 
     36 + def run_crawl(self):
     37 + page = 1
     38 + set_page = lambda x: (x-1)*10
     39 + payload = {'t': 'images', 'q': self.q, 'offset': set_page(page), 'count': '10', \
     40 + 'safesearch': '0', 'device': 'desktop', 'locale': 'en_GB', 'device': 'desktop'}
     41 + while True:
     42 + self.framework.verbose(f"[QWANT] Searching in {page+1} page...")
     43 + try:
     44 + req = self.framework.request(url=self.qwant, params=payload)
     45 + except Exception as e:
     46 + self.framework.error(f"ConnectionError {e}.", 'util/engines/qwant_images', 'run_crawl')
     47 + self.framework.error('Qwant is missed!', 'util/engines/qwant_images', 'run_crawl')
     48 + break
     49 + else:
     50 + if req.status_code == 429 and "I can't let you do that..." in req.text and '<div class="error-code">' in req.text:
     51 + self.framework.error('429 Too Many Requests', 'util/engines/qwant_images', 'run_crawl')
     52 + return
     53 + self._pages += req.text
     54 + try:
     55 + self._json.append(req.json())
     56 + except Exception as e:
     57 + self.framework.error('429 Too Many Requests', 'util/engines/qwant_images', 'run_crawl')
     58 + return
     59 + else:
     60 + if req.json() == {'status': 'error', 'data': {'error_code': 22}}:
     61 + self.framework.error('429 Too Many Requests', 'util/engines/qwant_images', 'run_crawl')
     62 + return
     63 + else:
     64 + if page == self.limit:
     65 + break
     66 + page += 1
     67 + payload['offset'] = set_page(page)
     68 + 
     69 + @property
     70 + def pages(self):
     71 + return self._pages
     72 + 
     73 + @property
     74 + def json(self):
     75 + return self._json
     76 + 
     77 + @property
     78 + def results(self):
     79 + results = []
     80 + for page in self._json:
     81 + items = page.get('data', {}).get('result', {})
     82 + if items:
     83 + items = items.get('items', [])
     84 + for item in items:
     85 + results.append({
     86 + "a": item.get("url"),
     87 + "i": item.get("media"),
     88 + "t": item.get("title"),
     89 + "d": f"{item.get('width')}*{item.get('height')} {item.get('size')}B"}
     90 + )
     91 + 
     92 + return results
     93 + 
     94 + @property
     95 + def dns(self):
     96 + return self.framework.page_parse(self._pages).get_dns(self.q, self.links)
     97 + 
     98 + @property
     99 + def emails(self):
     100 + return self.framework.page_parse(self._pages).get_emails(self.q)
     101 + 
     102 + @property
     103 + def docs(self):
     104 + return self.framework.page_parse(self._pages).get_docs(self.q, self.links)
     105 + 
     106 + 
  • ■ ■ ■ ■ ■ ■
    maryam/core/util/iris/meta_search_util.py
    skipped 39 lines
    40 40   cite = f"{host}{path}"
    41 41   return cite
    42 42   
    43  - def simple_merge(results):
     43 + def remove_dups(self, res):
     44 + urls = []
     45 + new = []
     46 + for i in res:
     47 + a = self.urlib(i['a'].lower()).sub_service()
     48 + if a not in urls:
     49 + urls.append(a)
     50 + new.append(i)
     51 + return new
     52 + 
     53 + def simple_merge(results) -> 'merging results based on quality of engines':
    44 54   engines_len = len(results)
    45 55   merged = []
    46 56   
    47 57   for i in range(len(min(results, key=len))):
    48 58   for e in range(engines_len):
    49 59   merged.append(results[e%engines_len].pop(0))
     60 + 
    50 61   
    51 62   for i in results:
    52 63   for j in i:
    skipped 4 lines
    57 68   def compute_count_consensus(
    58 69   e: dict(type=list, help='list of search engines sorted by quality'),
    59 70   l: dict(type=int, help='number of results')) -> 'a list of numbers':
    60  - x=len(e)
    61  - o={}
     71 + x = len(e)
     72 + o = {}
    62 73   for i in e:
    63  - o[i]=trunc(l/x)
    64  - l-=l-(l%x)
    65  - if l!=0:
    66  - if l<x:
     74 + o[i] = trunc(l/x)
     75 + l -= l - (l%x)
     76 + if l != 0:
     77 + if l < x:
    67 78   for i in range(l):
    68  - o[e[i]]+=1
     79 + o[e[i]] += 1
    69 80   return o
    70 81   
  • maryam/modules/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/modules/iris/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/modules/iris/cluster.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • ■ ■ ■ ■ ■
    maryam/modules/iris/iris.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    skipped 18 lines
    19 19   'author': 'Saeed, Kaushik',
    20 20   'version': '0.4',
    21 21   'description': 'Iris is a built-in meta search engine.',
    22  - 'comments': ('It should be note that this is a beta version and has lots of bugs!',),
     22 + 'comments': ('It should be note that this is a beta version and has many bugs!',),
    23 23   'contributors': 'Aman, Dimitris, Divya, Vikas, Kunal',
    24 24   'sources': ('google', 'bing', 'duckduckgo', 'millionshort', 'etools'),
    25 25   'options': (
    skipped 11 lines
    37 37   for _ in concurrent.futures.as_completed(futures):
    38 38   pass
    39 39   
    40  -def remove_dups(self, res):
    41  - urls = []
    42  - new = []
    43  - for i in res:
    44  - a = self.urlib(i['a'].lower()).sub_service()
    45  - if a not in urls:
    46  - urls.append(a)
    47  - new.append(i)
    48  - return new
    49  - 
    50 40  def search(self, name, q, limit):
    51 41   global RESULTS
    52 42   count = MAPPED[name]
    skipped 18 lines
    71 61   engines = MAPPED.keys()
    72 62   thread(self, search, query, 3, engines)
    73 63   simple_merge = self.meta_search_util.simple_merge([RESULTS[x] for x in engines if x in RESULTS])
    74  - final_results = remove_dups(self, simple_merge)
     64 + # TODO: Removing duplicates should be done during merging
     65 + final_results = self.meta_search_util.remove_dups(self, simple_merge)
    75 66   output = {'results': final_results}
    76 67   self.save_gather(output, 'iris/iris', query, output=self.options['output'])
    77 68   return output
    skipped 4 lines
  • maryam/modules/iris/iris_cluster.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/modules/iris/sentiment.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • maryam/modules/iris/topicmodeling.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755
    Content is identical
  • ■ ■ ■ ■ ■ ■
    maryam/modules/osint/image_search.py
     1 +"""
     2 +OWASP Maryam!
     3 +This program is free software: you can redistribute it and/or modify
     4 +it under the terms of the GNU General Public License as published by
     5 +the Free Software Foundation, either version 3 of the License, or
     6 +any later version.
     7 +This program is distributed in the hope that it will be useful,
     8 +but WITHOUT ANY WARRANTY; without even the implied warranty of
     9 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     10 +GNU General Public License for more details.
     11 +You should have received a copy of the GNU General Public License
     12 +along with this program. If not, see <http://www.gnu.org/licenses/>.
     13 +"""
     14 + 
     15 +meta = {
     16 + 'name': 'Image Search',
     17 + 'author': 'Saeed',
     18 + 'version': '0.1',
     19 + 'description': 'Search in open-sources to find images.',
     20 + 'sources': ('bing', 'google', 'qwant'),
     21 + 'options': (
     22 + ('query', None, True, 'Query, host Name, company Name, keyword, , etc', '-q', 'store', str),
     23 + ('engines', 'google', True, 'Search engines with comma separator', '-e', 'store', str),
     24 + ('thread', 3, False, 'The number of engine that run per round(default=2)', '-t', 'store', int),
     25 + ),
     26 + 'examples': ('image_search -q amazon -e google,bing,qwant',)
     27 +}
     28 +
     29 +IMGS = []
     30 +def remove_dups(self):
     31 + urls = []
     32 + new = []
     33 + for i in IMGS:
     34 + a = self.urlib(i['i'].lower()).sub_service()
     35 + if a not in urls:
     36 + urls.append(a)
     37 + new.append(i)
     38 + return new
     39 + 
     40 +def search(self, name, q):
     41 + global IMGS
     42 + try:
     43 + engine = getattr(self, name + '_images')
     44 + q = q
     45 + varnames = engine.__init__.__code__.co_varnames
     46 + attr = engine(q)
     47 + 
     48 + attr.run_crawl()
     49 + IMGS += attr.results
     50 + except Exception as e:
     51 + print(e)
     52 + 
     53 +def module_api(self):
     54 + query = self.options['query']
     55 + engines = self.options['engines'].lower().split(',')
     56 + 
     57 + self.thread(search, self.options['thread'], engines, query, meta['sources'])
     58 + INGS = remove_dups(self)
     59 + output = {'results' : IMGS}
     60 + self.save_gather(output, 'osint/image_search', query, output=self.options['output'])
     61 + return output
     62 + 
     63 +def module_run(self):
     64 + for i in module_api(self)['results']:
     65 + self.output(i['a'])
     66 + self.output(i['i'])
     67 + self.output(i['t'])
     68 + self.output(i['d'])
     69 + print()
     70 + 
  • maryam.py | 100755 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100644
    Content is identical
  • requirements | 100755 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100644
    Content is identical
Please wait...
Page is in error, reload to recover