STRLCPY/recollapse

Initial version
0xacb committed 1 year ago

57dca84a

■ ■ ■ ■ ■ ■

.gitignore

1	+	# Byte-compiled / optimized / DLL files
2	+	__pycache__/
3	+	*.py[cod]
4	+	*$py.class
5	+
6	+	# C extensions
7	+	*.so
8	+
9	+	# Distribution / packaging
10	+	.Python
11	+	build/
12	+	develop-eggs/
13	+	dist/
14	+	downloads/
15	+	eggs/
16	+	.eggs/
17	+	lib/
18	+	lib64/
19	+	parts/
20	+	sdist/
21	+	var/
22	+	wheels/
23	+	pip-wheel-metadata/
24	+	share/python-wheels/
25	+	*.egg-info/
26	+	.installed.cfg
27	+	*.egg
28	+	MANIFEST
29	+
30	+	# PyInstaller
31	+	# Usually these files are written by a python script from a template
32	+	# before PyInstaller builds the exe, so as to inject date/other infos into it.
33	+	*.manifest
34	+	*.spec
35	+
36	+	# Installer logs
37	+	pip-log.txt
38	+	pip-delete-this-directory.txt
39	+
40	+	# Unit test / coverage reports
41	+	htmlcov/
42	+	.tox/
43	+	.nox/
44	+	.coverage
45	+	.coverage.*
46	+	.cache
47	+	nosetests.xml
48	+	coverage.xml
49	+	*.cover
50	+	*.py,cover
51	+	.hypothesis/
52	+	.pytest_cache/
53	+
54	+	# Translations
55	+	*.mo
56	+	*.pot
57	+
58	+	# Django stuff:
59	+	*.log
60	+	local_settings.py
61	+	db.sqlite3
62	+	db.sqlite3-journal
63	+
64	+	# Flask stuff:
65	+	instance/
66	+	.webassets-cache
67	+
68	+	# Scrapy stuff:
69	+	.scrapy
70	+
71	+	# Sphinx documentation
72	+	docs/_build/
73	+
74	+	# PyBuilder
75	+	target/
76	+
77	+	# Jupyter Notebook
78	+	.ipynb_checkpoints
79	+
80	+	# IPython
81	+	profile_default/
82	+	ipython_config.py
83	+
84	+	# pyenv
85	+	.python-version
86	+
87	+	# pipenv
88	+	# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89	+	# However, in case of collaboration, if having platform-specific dependencies or dependencies
90	+	# having no cross-platform support, pipenv may install dependencies that don't work, or not
91	+	# install all needed dependencies.
92	+	#Pipfile.lock
93	+
94	+	# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95	+	__pypackages__/
96	+
97	+	# Celery stuff
98	+	celerybeat-schedule
99	+	celerybeat.pid
100	+
101	+	# SageMath parsed files
102	+	*.sage.py
103	+
104	+	# Environments
105	+	.env
106	+	.venv
107	+	env/
108	+	venv/
109	+	ENV/
110	+	env.bak/
111	+	venv.bak/
112	+
113	+	# Spyder project settings
114	+	.spyderproject
115	+	.spyproject
116	+
117	+	# Rope project settings
118	+	.ropeproject
119	+
120	+	# mkdocs documentation
121	+	/site
122	+
123	+	# mypy
124	+	.mypy_cache/
125	+	.dmypy.json
126	+	dmypy.json
127	+
128	+	# Pyre type checker
129	+	.pyre/
130	+

■ ■ ■ ■ ■ ■

Dockerfile

1	+	FROM python:3.7-alpine as base
2	+
3	+	FROM base as build
4	+
5	+	WORKDIR /build
6	+
7	+	COPY requirements.txt /requirements.txt
8	+	RUN pip install --prefix /build -r /requirements.txt
9	+
10	+	FROM base
11	+
12	+	COPY --from=build /build /usr/local
13	+
14	+	WORKDIR /tool
15	+
16	+	COPY recollapse /tool
17	+	RUN chmod +x recollapse
18	+
19	+	ENTRYPOINT ["/tool/recollapse"]
20	+

■ ■ ■ ■ ■ ■

LICENSE

1	+	MIT License
2	+
3	+	Copyright (c) 2022 André Baptista
4	+
5	+	Permission is hereby granted, free of charge, to any person obtaining a copy
6	+	of this software and associated documentation files (the "Software"), to deal
7	+	in the Software without restriction, including without limitation the rights
8	+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9	+	copies of the Software, and to permit persons to whom the Software is
10	+	furnished to do so, subject to the following conditions:
11	+
12	+	The above copyright notice and this permission notice shall be included in all
13	+	copies or substantial portions of the Software.
14	+
15	+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18	+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20	+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21	+	SOFTWARE.
22	+

■ ■ ■ ■ ■ ■

README.md

1	+	# REcollapse
2	+
3	+	REcollapse is a helper tool for black-box regex fuzzing to bypass validations and discover normalizations in web applications.
4	+
5	+	It can also be helpful to bypass WAFs and weak vulnerability mitigations. For more information, take a look at the [REcollapse slides](https://github.com/0xacb/recollapse/blob/main/till_recollapse_fuzzing_the_web_for_mysterious_bugs.pdf).
6	+
7	+	The goal of this tool is to generate payloads for testing. Actual fuzzing shall be done with other tools like [Burp](https://portswigger.net/burp) (intruder), [ffuf](https://github.com/ffuf/ffuf), or similar.
8	+
9	+	---------------
10	+
11	+	### Installation
12	+
13	+	Requirements: Python 3
14	+
15	+	`pip3 install --user --upgrade -r requirements.txt` or `./install.sh`
16	+
17	+	Docker
18	+
19	+	`docker build -t recollapse .`
20	+
21	+	---------------
22	+
23	+	### Usage
24	+	```
25	+	$ recollapse -h
26	+	usage: recollapse [-h] [-p POSITIONS] [-e {1,2,3}] [-r RANGE] [-s SIZE] [-f FILE]
27	+	[-an] [-mn MAXNORM] [-nt]
28	+	[input]
29	+
30	+	REcollapse is a helper tool for black-box regex fuzzing to bypass validations and
31	+	discover normalizations in web applications
32	+
33	+	positional arguments:
34	+	input original input
35	+
36	+	options:
37	+	-h, --help show this help message and exit
38	+	-p POSITIONS, --positions POSITIONS
39	+	pivot position modes. Example: 1,2,3,4 (default). 1: starting,
40	+	2: separator, 3: normalization, 4: termination
41	+	-e {1,2,3}, --encoding {1,2,3}
42	+	1: URL-encoded format (default), 2: Unicode format, 3: Raw
43	+	format
44	+	-r RANGE, --range RANGE
45	+	range of bytes for fuzzing. Example: 0,0xff (default)
46	+	-s SIZE, --size SIZE number of fuzzing bytes (default: 1)
47	+	-f FILE, --file FILE read input from file
48	+	-an, --alphanum include alphanumeric bytes in fuzzing range
49	+	-mn MAXNORM, --maxnorm MAXNORM
50	+	maximum number of normalizations (default: 3)
51	+	-nt, --normtable print normalization table
52	+	```
53	+
54	+	---------------
55	+
56	+	### Detailed options explanation
57	+
58	+	Let's consider `this_is.an_example` as the input.
59	+
60	+	Positions
61	+
62	+	1. Fuzz the beginning of the input: `$this_is.an_example`
63	+	2. Fuzz the before and after special characters: `this$_$is$.$an$_$example`
64	+	3. Fuzz normalization positions: replace all possible bytes according to the [normalization table](https://0xacb.com/normalization_table)
65	+	4. Fuzz the end of the input: `this_is.an_example$`
66	+
67	+	Encoding
68	+
69	+	1. URL-encoded format to be used with `application/x-www-form-urlencoded` or query parameters: `%22this_is.an_example`
70	+	2. Unicode format to be used with `application/json`: `\u0022this_is.an_example`
71	+	3. Raw format to be used with `multipart/form-data`: `"this_is.an_example`
72	+
73	+	Range
74	+
75	+	Specify a range of bytes for fuzzing: `-r 1-127`. This will exclude alphanumeric characters unless the `-an` option is provided.
76	+
77	+	Size
78	+
79	+	Specify the size of fuzzing for positions `1`, `2` and `4`. The default approach is to fuzz all possible values for one byte. Increasing the size will consume more resources and generate many more inputs, but it can lead to finding new bypasses.
80	+
81	+	File
82	+
83	+	Input can be provided as a positional argument, stdin, or a file through the `-f` option.
84	+
85	+	Alphanumeric
86	+
87	+	By default, alphanumeric characters will be excluded from output generation, which is usually not interesting in terms of responses. You can allow this with the `-an` option.
88	+
89	+	Maximum number or normalizations
90	+
91	+	Not all normalization libraries have the same behavior. By default, three possibilities for normalizations are generated for each input index, which is usually enough. Use the `-mn` option to go further.
92	+
93	+	Normalization table
94	+
95	+	Use the `-nt` option to show the normalization table.
96	+
97	+	---------------
98	+
99	+	### Example
100	+
101	+	```bash
102	+	$ recollapse -p 1,2,4 -r 10-11 https://legit.example.com
103	+	%0ahttps://legit.example.com
104	+	%0bhttps://legit.example.com
105	+	https%0a://legit.example.com
106	+	https%0b://legit.example.com
107	+	https:%0a//legit.example.com
108	+	https:%0b//legit.example.com
109	+	https:/%0a/legit.example.com
110	+	https:/%0b/legit.example.com
111	+	https://%0alegit.example.com
112	+	https://%0blegit.example.com
113	+	https://legit%0a.example.com
114	+	https://legit%0b.example.com
115	+	https://legit.%0aexample.com
116	+	https://legit.%0bexample.com
117	+	https://legit.example%0a.com
118	+	https://legit.example%0b.com
119	+	https://legit.example.%0acom
120	+	https://legit.example.%0bcom
121	+	https://legit.example.com%0a
122	+	https://legit.example.com%0b
123	+	```
124	+
125	+	---------------
126	+
127	+	### Resources
128	+
129	+	This technique has been presented on [BSidesLisbon 2022](https://bsideslisbon.org/)
130	+
131	+	Slides: [till_recollapse_fuzzing_the_web_for_mysterious_bugs.pdf](https://github.com/0xacb/recollapse/blob/main/till_recollapse_fuzzing_the_web_for_mysterious_bugs.pdf)
132	+
133	+	Normalization table: https://0xacb.com/normalization_table
134	+
135	+	---------------
136	+
137	+	Thanks
138	+
139	+	- [@regala_](https://twitter.com/regala_)
140	+	- [@0xz3z4d45](https://twitter.com/0xz3z4d45)
141	+	- [@jllis](https://twitter.com/jllis)
142	+	- [@samwcyo](https://twitter.com/samwcyo)
143	+	- [@yassineaboukir](https://twitter.com/yassineaboukir)
144	+	- [@0xteknogeek](https://twitter.com/0xteknogeek)
145	+	- BBAC
146	+
147	+	---------------
148	+
149	+	### ⚠ Legal Disclaimer ⚠
150	+
151	+	This project is made for educational and ethical testing purposes only. Usage of this tool for attacking targets without prior mutual consent is illegal. Developers assume no liability and are not responsible for any misuse or damage caused by this tool.
152	+

■ ■ ■ ■ ■ ■

install.sh

1 + #!/bin/bash
2 +
3 + pip3 install --upgrade -r requirements.txt
4 + sudo cp recollapse /usr/local/bin/
5 +

All occurrences

■ ■ ■ ■ ■ ■

recollapse

1	+	#!/usr/bin/env python3
2	+
3	+	import argparse
4	+	import sys
5	+	import string
6	+	import unidecode
7	+	import warnings
8	+	from prettytable import PrettyTable
9	+	import urllib.parse
10	+	import itertools
11	+
12	+	warnings.simplefilter("ignore")
13	+
14	+
15	+	class Recollapse:
16	+	ENCODING_URL = 1
17	+	ENCODING_UNICODE = 2
18	+	ENCODING_RAW = 3
19	+
20	+	MODE_START = 1
21	+	MODE_SEP = 2
22	+	MODE_NORM = 3
23	+	MODE_TERM = 4
24	+
25	+	output = []
26	+	normalization_d = {}
27	+
28	+	def __init__(self, size, encoding, range, positions, input, file, normtable, alphanum, maxnorm):
29	+	self.build_normalization_dict()
30	+	self.size = size
31	+	self.encoding = encoding
32	+	self.range = range
33	+	self.positions = positions
34	+	self.input = input
35	+	self.file = file
36	+	self.normtable = normtable
37	+	self.alphanum = alphanum
38	+	self.maxnorm = maxnorm
39	+
40	+	def run(self):
41	+	if self.normtable:
42	+	self.print_normalization_table()
43	+	return
44	+
45	+	if not self.input:
46	+	if self.file:
47	+	with open(self.file) as f:
48	+	self.input = f.read()
49	+	else:
50	+	self.input = sys.stdin.read().rstrip()
51	+
52	+	fuzzing_range = range(self.range[0], self.range[1]+1)
53	+	if not self.alphanum:
54	+	alphanum_ascii = list(map(ord, string.ascii_letters + string.digits))
55	+	fuzzing_range = [b for b in list(fuzzing_range) if b not in alphanum_ascii]
56	+
57	+	if self.MODE_START in self.positions:
58	+	for t in itertools.product(fuzzing_range, repeat=self.size):
59	+	self.generate(t, 0)
60	+
61	+	if self.MODE_SEP in self.positions:
62	+	for i in range(len(self.input)):
63	+	c = self.input[i]
64	+	if c in string.punctuation:
65	+	for t in itertools.product(fuzzing_range, repeat=self.size):
66	+	self.generate(t, i)
67	+	self.generate(t, i+1)
68	+
69	+	if self.MODE_NORM in self.positions:
70	+	for i in range(len(self.input)):
71	+	c = self.input[i]
72	+	if c in self.normalization_d:
73	+	for cc in self.normalization_d.get(c)[0:self.maxnorm]:
74	+	self.generate((ord(cc),), i, replace=True)
75	+
76	+	if self.MODE_TERM in self.positions:
77	+	for t in itertools.product(fuzzing_range, repeat=self.size):
78	+	self.generate(t, len(self.input))
79	+
80	+	print("\n".join(list(sorted(set(self.output)))))
81	+
82	+	def build_normalization_dict(self):
83	+	charset = [chr(c) for c in range(0x20,0x7f)]
84	+	for i in range(0x80):
85	+	c = chr(i)
86	+	if c in charset:
87	+	self.normalization_d[c] = []
88	+
89	+	for c in range(0xffff):
90	+	norm_c = unidecode.unidecode(chr(c))
91	+	if len(norm_c) == 1 and norm_c in charset and norm_c != chr(c):
92	+	self.normalization_d[norm_c].append(chr(c))
93	+
94	+	def print_normalization_table(self):
95	+	table = []
96	+	max_col = len(self.normalization_d[max(self.normalization_d, key=lambda k: len(self.normalization_d[k]))])
97	+
98	+	for c in self.normalization_d:
99	+	l = self.normalization_d.get(c)
100	+	l = l + [""]*(max_col-len(l))
101	+	table.append([hex(ord(c)), c] + l)
102	+
103	+	tab = PrettyTable()
104	+	tab.header = False
105	+	tab.border = False
106	+	tab.add_rows(table)
107	+	print(tab)
108	+
109	+	def generate(self, bytes, index, replace=False):
110	+	s = self.input
111	+	a = s[:index]
112	+	b = s[index:]
113	+
114	+	if replace:
115	+	a = s[:index]
116	+	b = s[index+1:]
117	+
118	+	x = ""
119	+	if self.encoding == self.ENCODING_URL:
120	+	for byte in bytes:
121	+	if byte > 0xff:
122	+	x += urllib.parse.quote(chr(byte))
123	+	else:
124	+	x += "%{y}".format(y=hex(byte)[2:].zfill(2))
125	+	self.output.append("{a}{x}{b}".format(x=x, a=a, b=b))
126	+	elif self.encoding == self.ENCODING_RAW:
127	+	for byte in bytes:
128	+	if 10 <= byte < 13 or byte == 27:
129	+	continue
130	+	x += chr(byte)
131	+	try:
132	+	self.output.append("{a}{x}{b}".format(x=x, a=a, b=b))
133	+	except UnicodeEncodeError:
134	+	pass
135	+	elif self.encoding == self.ENCODING_UNICODE:
136	+	for byte in bytes:
137	+	x += "\\u{x}".format(x=hex(byte)[2:].zfill(4))
138	+	self.output.append("{a}{x}{b}".format(x=x, a=a, b=b))
139	+
140	+
141	+	def parse_args():
142	+	parser = argparse.ArgumentParser(description="REcollapse is a helper tool for black-box regex fuzzing to bypass validations and discover normalizations in web applications")
143	+
144	+	parser.add_argument("-p", "--positions", help="pivot position modes. Example: 1,2,3,4 (default). 1: starting, 2: separator, 3: normalization, 4: termination", required=False, default="1,2,3,4", type=str)
145	+	parser.add_argument("-e", "--encoding", help="1: URL-encoded format (default), 2: Unicode format, 3: Raw format", required=False, default=1, type=int, choices=range(1, 4))
146	+	parser.add_argument("-r", "--range", help="range of bytes for fuzzing. Example: 0,0xff (default)", required=False, default="0,0xff", type=str)
147	+	parser.add_argument("-s", "--size", help="number of fuzzing bytes (default: 1)", required=False, default=1)
148	+	parser.add_argument("-f", "--file", help="read input from file", required=False)
149	+	parser.add_argument("-an", "--alphanum", help="include alphanumeric bytes in fuzzing range", required=False, default=False, action="store_true")
150	+	parser.add_argument("-mn", "--maxnorm", help="maximum number of normalizations (default: 3)", default=3, type=int)
151	+	parser.add_argument("-nt", "--normtable", help="print normalization table", required=False, default=False, action="store_true")
152	+	parser.add_argument("input", help="original input", nargs="?")
153	+
154	+	args = parser.parse_args()
155	+
156	+	if len(sys.argv) == 1:
157	+	parser.print_help()
158	+	exit(1)
159	+
160	+	if args.range:
161	+	base = 0
162	+	sep = ","
163	+	if "0x" in args.range:
164	+	base = 16
165	+	if "-" in args.range:
166	+	sep = "-"
167	+	args.range = list(map(lambda x: int(x, base), args.range.split(sep)))
168	+	if len(args.range) == 1:
169	+	args.range.append(args.range[0]+1)
170	+
171	+	if args.positions:
172	+	try:
173	+	args.positions = list(map(lambda x: int(x), args.positions.split(",")))
174	+	except ValueError:
175	+	print("Invalid positions provided")
176	+	exit(1)
177	+
178	+	for p in args.positions:
179	+	if not 0 < p < 5:
180	+	print("Invalid positions provided")
181	+	exit(1)
182	+
183	+	args.size = int(args.size)
184	+
185	+	return args
186	+
187	+
188	+	if __name__ == "__main__":
189	+	args = parse_args()
190	+	recollapse = Recollapse(**vars(args))
191	+	recollapse.run()
192	+

■ ■ ■ ■ ■ ■

requirements.txt

1 + unidecode
2 + prettytable

All occurrences
till_recollapse_fuzzing_the_web_for_mysterious_bugs.pdf

Binary file.

1	+	#!/bin/bash
2	+
3	+	pip3 install --upgrade -r requirements.txt
4	+	sudo cp recollapse /usr/local/bin/
5	+

Initial version