STRLCPY/btcrecover

prefer comparisons to bytestrings for user input

* fixes a minor bug for #107 which caused an exit with a
   stack trace instead of the user-friendly --utf8 warning
 * silences unnecessary UnicodeWarnings
 * also improve the docs regarding special chars/escaping & comments

Christopher Gurnee committed 7 years ago

33f7a029

1 parent 4fab004f

Total 4 files Show one by one

■ ■ ■ ■ ■ ■

TUTORIAL.md

		skipped 40 lines
41	41
42	42		When used with these contents, btcrecover will try all possible combinations using one or more of these three tokens, e.g. `Hotel_california` (just one token), `BettlejuiceCairo` (two tokens pasted together), etc.
43	43
	44	+	Note that lines which start with a `#` are ignored as comments, but only if the `#` is at the very beginning of the line:
	45	+
	46	+	# This line is a comment, it's ignored.
	47	+	# The line at the bottom is not a comment because the
	48	+	# first character on the line is a space, and not a #
	49	+	#a_single_token_starting_with_the_#_symbol
	50	+
44	51		### Mutual Exclusion ###
45	52
46	53		Maybe you’re not sure about how you spelled or capitalized one of those words. Take this token file:
		skipped 507 lines

■ ■ ■ ■ ■ ■

btcrecover/__init__.py

1 1 # for backwards compatibility
2 - from btcrpass import *
2 + from .btcrpass import *
3 3

All occurrences

■ ■ ■ ■ ■ ■

btcrecover/btcrpass.py

		skipped 28 lines
29	29		# (all optional futures for 2.7)
30	30		from __future__ import print_function, absolute_import, division, unicode_literals
31	31
32		-	__version__ = "0.17.5"
	32	+	__version__ = "0.17.6"
33	33		__ordering_version__ = b"0.6.4" # must be updated whenever password ordering changes
34	34
35	35		import sys, argparse, itertools, string, re, multiprocessing, signal, os, cPickle, gc, \
		skipped 2493 lines
2529	2529		for c in s:
2530	2530		if ord(c) > 127: # 2**7 - 1
2531	2531		error_exit(error_msg, "has character with code point", ord(c), "> max (127 / ASCII)\n"
2532		-	"(Unicode mode can be enabled with the --utf8 option)")
	2532	+	"(see the Unicode Support section in the Tutorial and the --utf8 option)")
2533	2533		else:
2534	2534		# For Unicode mode, a REPLACEMENT CHARACTER indicates a failed conversion from UTF-8
2535	2535		if no_replacement_chars and "\uFFFD" in s:
		skipped 4 lines
2540	2540		for c in s:
2541	2541		c = ord(c)
2542	2542		if 0xD800 <= c <= 0xDBFF or 0xDC00 <= c <= 0xDFFF:
2543		-	error_exit(error_msg, "has character with code point > max ("+unicode(sys.maxunicode)+" / BMP)")
	2543	+	error_exit(error_msg, "has character with code point > max ("+unicode(sys.maxunicode)+" / Unicode BMP)")
2544	2544
2545	2545
2546	2546		# Returns an (order preserved) list or string with duplicate elements removed
		skipped 153 lines
2700	2700		if size == 0: return tstr("")
2701	2701		peeked = self._peeked
2702	2702		self._peeked = b""
2703		-	if peeked == "\n": return peeked # A blank Unix-style line (or OS X)
2704		-	if peeked == "\r": # A blank Windows or MacOS line
	2703	+	if peeked == b"\n": return peeked # A blank Unix-style line (or OS X)
	2704	+	if peeked == b"\r": # A blank Windows or MacOS line
2705	2705		if size == 1:
2706	2706		return peeked
2707		-	if self.peek() == "\n":
	2707	+	if self.peek() == b"\n":
2708	2708		peeked = self._peeked
2709	2709		self._peeked = b""
2710	2710		return b"\r"+peeked # A blank Windows-style line
		skipped 303 lines
3014	3014
3015	3015		# If we're not --restoring nor using a passwordlist, try to open the tokenlist_file now
3016	3016		# (if we are restoring, we don't know what to open until after the restore data is loaded)
3017		-	TOKENS_AUTO_FILENAME = "btcrecover-tokens-auto.txt"
	3017	+	TOKENS_AUTO_FILENAME = b"btcrecover-tokens-auto.txt"
3018	3018		if not (args.restore or args.passwordlist or args.performance or base_iterator):
3019	3019		tokenlist_file = open_or_use(args.tokenlist, "r", kwds.get("tokenlist"),
3020	3020		default_filename=TOKENS_AUTO_FILENAME, permit_stdin=True, make_peekable=True)
		skipped 5 lines
3026	3026		# If the first line of the tokenlist file starts with "#\s*--", parse it as additional arguments
3027	3027		# (note that command line arguments can override arguments in this file)
3028	3028		tokenlist_first_line_num = 1
3029		-	if tokenlist_file and tokenlist_file.peek() == "#": # if it's either a comment or additional args
	3029	+	if tokenlist_file and tokenlist_file.peek() == b"#": # if it's either a comment or additional args
3030	3030		first_line = tokenlist_file.readline()[1:].strip()
3031	3031		tokenlist_first_line_num = 2 # need to pass this to parse_token_list
3032		-	if first_line.startswith("--"): # if it's additional args, not just a comment
3033		-	print("Read additional options from tokenlist file: "+first_line, file=sys.stderr)
	3032	+	if first_line.startswith(b"--"): # if it's additional args, not just a comment
	3033	+	print(b"Read additional options from tokenlist file: "+first_line, file=sys.stderr)
3034	3034		tokenlist_args = first_line.split() # TODO: support quoting / escaping?
3035	3035		effective_argv = tokenlist_args + effective_argv # prepend them so that real argv takes precedence
3036	3036		args = parser.parse_args(effective_argv) # reparse the arguments
3037	3037		# Check this again as early as possible so user doesn't miss any error messages
3038	3038		if args.pause: enable_pause()
3039	3039		for arg in tokenlist_args:
3040		-	if arg.startswith("--to"): # --tokenlist
	3040	+	if arg.startswith(b"--to"): # --tokenlist
3041	3041		error_exit("the --tokenlist option is not permitted inside a tokenlist file")
3042		-	elif arg.startswith("--pas"): # --passwordlist
	3042	+	elif arg.startswith(b"--pas"): # --passwordlist
3043	3043		error_exit("the --passwordlist option is not permitted inside a tokenlist file")
3044		-	elif arg.startswith("--pe"): # --performance
	3044	+	elif arg.startswith(b"--pe"): # --performance
3045	3045		error_exit("the --performance option is not permitted inside a tokenlist file")
3046		-	elif arg.startswith("--u"): # --utf8
	3046	+	elif arg.startswith(b"--u"): # --utf8
3047	3047		error_exit("the --utf8 option is not permitted inside a tokenlist file")
3048	3048
3049	3049
		skipped 36 lines
3086	3086		if hasattr(tokenlist_file, "name") and tokenlist_file.name.startswith(TOKENS_AUTO_FILENAME):
3087	3087		enable_pause() # enabled by default when using btcrecover-tokens-auto.txt
3088	3088		# Display a warning if any options (all ignored) were specified in the tokenlist file
3089		-	if tokenlist_file and tokenlist_file.peek() == "#": # if it's either a comment or additional args
	3089	+	if tokenlist_file and tokenlist_file.peek() == b"#": # if it's either a comment or additional args
3090	3090		first_line = tokenlist_file.readline()
3091	3091		tokenlist_first_line_num = 2 # need to pass this to parse_token_list
3092	3092		if re.match(b"#\s*--", first_line, re.UNICODE): # if it's additional args, not just a comment
3093		-	print(prog+": warning: all options loaded from restore file; ignoring options in tokenlist file '"+tokenlist_file.name+"'", file=sys.stderr)
	3093	+	print(prog+b": warning: all options loaded from restore file; ignoring options in tokenlist file '"+tokenlist_file.name+b"'", file=sys.stderr)
3094	3094		print("Using autosave file '"+restore_filename+"'")
3095	3095		args.skip = savestate[b"skip"] # override this with the most recent value
3096	3096		restored = True # a global flag for future reference
		skipped 182 lines
3279	3279		typos_map = None
3280	3280		if args.typos_map:
3281	3281		sha1 = hashlib.sha1() if savestate else None
3282		-	typos_map = parse_mapfile(open_or_use(args.typos_map, "r", kwds.get("typos_map")), sha1, "--typos-map")
	3282	+	typos_map = parse_mapfile(open_or_use(args.typos_map, "r", kwds.get("typos_map")), sha1, b"--typos-map")
3283	3283		#
3284	3284		# If autosaving, take the hash of the typos_map and either check it
3285	3285		# during a session restore to make sure we're actually restoring
		skipped 443 lines
3729	3729		# running_hash -- (opt.) adds the map's data to the hash object
3730	3730		# feature_name -- (opt.) used to generate more descriptive error messages
3731	3731		# same_permitted -- (opt.) if True, the input value may be mapped to the same output value
3732		-	def parse_mapfile(map_file, running_hash = None, feature_name = "map", same_permitted = False):
	3732	+	def parse_mapfile(map_file, running_hash = None, feature_name = b"map", same_permitted = False):
3733	3733		map_data = dict()
3734	3734		try:
3735	3735		for line_num, line in enumerate(map_file, 1):
3736		-	if line.startswith("#"): continue # ignore comments
	3736	+	if line.startswith(b"#"): continue # ignore comments
3737	3737		#
3738	3738		# Remove the trailing newline, then split the line exactly
3739	3739		# once on the specified delimiter (default: whitespace)
3740	3740		split_line = line.rstrip(tstr("\r\n")).split(args.delimiter, 1)
3741	3741		if split_line in ([], [tstr('')]): continue # ignore empty lines
3742	3742		if len(split_line) == 1:
3743		-	error_exit(feature_name, "file '"+map_file.name+"' has an empty replacement list on line", line_num)
	3743	+	error_exit(feature_name, b"file '"+map_file.name+b"' has an empty replacement list on line", line_num)
3744	3744		if args.delimiter is None: split_line[1] = split_line[1].rstrip() # ignore trailing whitespace by default
3745	3745
3746		-	check_chars_range(tstr().join(split_line), feature_name + " file" + (" '" + map_file.name + "'" if hasattr(map_file, "name") else ""))
	3746	+	check_chars_range(tstr().join(split_line), feature_name + b" file" + (b" '" + map_file.name + b"'" if hasattr(map_file, "name") else b""))
3747	3747		for c in split_line[0]: # (c is the character to be replaced)
3748	3748		replacements = duplicates_removed(map_data.get(c, tstr()) + split_line[1])
3749	3749		if not same_permitted and c in replacements:
		skipped 72 lines
3822	3822		MIDDLE = 3 # has .begin and .end attributes
3823	3823
3824	3824		def __init__(self, token, line_num = "?"):
3825		-	if token.startswith("^"):
	3825	+	if token.startswith(b"^"):
3826	3826		# If it is a syntactically correct positional, relative, or middle anchor
3827	3827		match = re.match(br"\^(?:(?P<begin>\d+)?(?P<middle>,)(?P<end>\d+)?\|(?P<rel>[rR])?(?P<pos>\d+))(?:\^\|\$)", token)
3828	3828		if match:
3829	3829		# If it's a middle (ranged) anchor
3830		-	if match.group("middle"):
3831		-	begin = match.group("begin")
3832		-	end = match.group("end")
	3830	+	if match.group(b"middle"):
	3831	+	begin = match.group(b"begin")
	3832	+	end = match.group(b"end")
3833	3833		cached_str = tstr("^") # begin building the cached __str__
3834	3834		if begin is None:
3835	3835		begin = 2
		skipped 17 lines
3853	3853		self.end = end - 1 if end != sys.maxint else end
3854	3854		#
3855	3855		# If it's a positional or relative anchor
3856		-	elif match.group("pos"):
3857		-	pos = int(match.group("pos"))
	3856	+	elif match.group(b"pos"):
	3857	+	pos = int(match.group(b"pos"))
3858	3858		cached_str = tstr("^") # begin building the cached __str__
3859		-	if match.group("rel"):
	3859	+	if match.group(b"rel"):
3860	3860		cached_str += tstr("r") + tstr(pos) + tstr("^")
3861	3861		self.type = AnchoredToken.RELATIVE
3862	3862		self.pos = pos
		skipped 12 lines
3875	3875		#
3876	3876		# Else it's a begin anchor
3877	3877		else:
3878		-	if len(token) > 1 and token[1] in "0123456789,":
	3878	+	if len(token) > 1 and token[1] in b"0123456789,":
3879	3879		print(prog+": warning: token on line", line_num, "looks like it might be a positional or middle anchor, " +
3880	3880		"but it can't be parsed correctly, so it's assumed to be a simple beginning anchor instead", file=sys.stderr)
3881		-	if len(token) > 2 and token[1].lower() == "r" and token[2] in "0123456789":
	3881	+	if len(token) > 2 and token[1].lower() == b"r" and token[2] in b"0123456789":
3882	3882		print(prog+": warning: token on line", line_num, "looks like it might be a relative anchor, " +
3883	3883		"but it can't be parsed correctly, so it's assumed to be a simple beginning anchor instead", file=sys.stderr)
3884	3884		cached_str = tstr("^") # begin building the cached __str__
		skipped 1 lines
3886	3886		self.pos = 0
3887	3887		self.text = token[1:]
3888	3888		#
3889		-	if self.text.endswith("$"):
	3889	+	if self.text.endswith(b"$"):
3890	3890		error_exit("token on line", line_num, "is anchored with both ^ at the beginning and $ at the end")
3891	3891		#
3892	3892		cached_str += self.text # finish building the cached __str__
3893	3893		#
3894	3894		# Parse end anchor if present
3895		-	elif token.endswith("$"):
	3895	+	elif token.endswith(b"$"):
3896	3896		cached_str = token
3897	3897		self.type = AnchoredToken.POSITIONAL
3898	3898		self.pos = b"$"
		skipped 30 lines
3929	3929		for line_num, line in enumerate(tokenlist_file, first_line_num):
3930	3930
3931	3931		# Ignore comments
3932		-	if line.startswith("#"):
	3932	+	if line.startswith(b"#"):
3933	3933		if re.match(b"#\s*--", line, re.UNICODE):
3934	3934		print(prog+": warning: all options must be on the first line, ignoring options on line", unicode(line_num), file=sys.stderr)
3935	3935		continue
		skipped 12 lines
3948	3948		# If a "+" is present at the beginning followed by at least one token,
3949	3949		# then exactly one of the token(s) is required. This is noted in the structure
3950	3950		# by removing the preceding None we added above (and also delete the "+")
3951		-	if new_list[1] == "+" and len(new_list) > 2:
	3951	+	if new_list[1] == b"+" and len(new_list) > 2:
3952	3952		del new_list[0:2]
3953	3953
3954	3954		# Check token syntax and convert any anchored tokens to an AnchoredToken object
		skipped 12 lines
3967	3967
3968	3968		# Check for tokens which look suspiciously like command line options
3969	3969		# (using a private ArgumentParser member func is asking for trouble...)
3970		-	if token.startswith("--") and parser_common._get_option_tuples(token):
	3970	+	if token.startswith(b"--") and parser_common._get_option_tuples(token):
3971	3971		if line_num == 1:
3972	3972		print(prog+": warning: token on line 1 looks like an option, "
3973	3973		"but line 1 did not start like this: #--option1 ...", file=sys.stderr)
		skipped 2 lines
3976	3976		" but all options must be on the first line", file=sys.stderr)
3977	3977
3978	3978		# Parse anchor if present and convert to an AnchoredToken object
3979		-	if token.startswith("^") or token.endswith("$"):
	3979	+	if token.startswith(b"^") or token.endswith(b"$"):
3980	3980		token = AnchoredToken(token, line_num) # (the line_num is just for error messages)
3981	3981		new_list[i] = token
3982	3982		has_any_anchors = True
		skipped 43 lines
4026	4026		if savestate and not backreference_maps_sha1:
4027	4027		backreference_maps_sha1 = hashlib.sha1()
4028	4028		backreference_maps[map_filename] = \
4029		-	parse_mapfile(open(map_filename, "r"), backreference_maps_sha1, "backreference map", same_permitted=True)
	4029	+	parse_mapfile(open(map_filename, "r"), backreference_maps_sha1, b"backreference map", same_permitted=True)
4030	4030
4031	4031
4032	4032		################################### Password Generation ###################################
		skipped 549 lines
4582	4582		except SystemExit as e:
4583	4583		passwordlist_warn(line_num, e.code)
4584	4584		continue
4585		-	if args.has_wildcards and "%" in password_base:
	4585	+	if args.has_wildcards and b"%" in password_base:
4586	4586		count_or_error_msg = count_valid_wildcards(password_base, permit_contracting_wildcards=True)
4587	4587		if isinstance(count_or_error_msg, basestring):
4588	4588		passwordlist_warn(line_num, count_or_error_msg)
		skipped 86 lines
4675	4675		custom_wildcard_cache[(m_custom, m_nocase)] = wildcard_set
4676	4676		else: # either a "normal" or a contracting wildcard
4677	4677		m_type = match.group(b"type")
4678		-	is_expanding = m_type not in "<>-"
	4678	+	is_expanding = m_type not in b"<>-"
4679	4679		if is_expanding:
4680	4680		if m_nocase and m_type in wildcard_nocase_sets:
4681	4681		wildcard_set = wildcard_nocase_sets[m_type]
		skipped 82 lines
4764	4764		else:
4765	4765		# Determine the max # of characters that can be removed from either the left
4766	4766		# or the right of the wildcard, not yet taking wildcard_maxlen into account
4767		-	max_from_left = l_len(password_prefix) if m_type in "<-" else 0
4768		-	if m_type in ">-":
	4767	+	max_from_left = l_len(password_prefix) if m_type in b"<-" else 0
	4768	+	if m_type in b">-":
4769	4769		max_from_right = password_postfix_with_wildcards.find("%")
4770	4770		if max_from_right == -1: max_from_right = l_len(password_postfix_with_wildcards)
4771	4771		else:
		skipped 799 lines

■ ■ ■ ■ ■ ■

docs/Limitations_and_Caveats.md

		skipped 13 lines
14	14
15	15		Additionally, btcrecover considers the following symbols special under certain specific circumstances in the tokenlist file (and for the `#` symbol, also in the typos-map file). A special symbol is part of the syntax, and not part of a password.
16	16
17		-	* `%` - always considered special; `%%` in a token will be replaced by `%` during searches
	17	+	* `%` - always considered special (see the [Wildcards](../TUTORIAL.md#expanding-wildcards) section); `%%` in a token will be replaced by `%` during searches
18	18		* `^` - only special if it's the first character of a token; `%^` will be replaced by `^` during searches
19	19		* `$` - only special if it's the last character of a token; `%S` (note the capital `S`) will be replaced by `$` during searches
20		-	* `#` - only special if it's the very first character on a line, the rest of the line is then ignored (a comment); note that if `#--` is at the very beginning of the tokenlist file, then the first line is parsed as additional command-line options
21		-	* `+` - only special if it's the first token (after possibly stripping whitespace) on a line, followed by a delimiter, and then followed by other token(s) (see the [Mutual Exclusion](../TUTORIAL.md#mutual-exclusion) section); if you need a `+` character in a token, make sure it's either not first on a line, or it's part of a larger token, or it's on a line all by itself
	20	+	* `#` - only special if it's the very first character on a line, see the [note about comments here](../TUTORIAL.md#basics)
	21	+	* `+` - only special if it's the first (not including any spaces) character on a line, immediately followed by a space (or delimiter) and then some tokens (see the [Mutual Exclusion](../TUTORIAL.md#mutual-exclusion) section); if you need a single `+` character as a token, make sure it's not the first token on the line, or it's on a line all by itself
22	22
23	23		None of this applies to passwordlist files, which always treat spaces and symbols (except for carriage-returns and line-feeds) verbatim, treating them as parts of a password.
24	24
		skipped 56 lines

1	1		# for backwards compatibility
2		-	from btcrpass import *
	2	+	from .btcrpass import *
3	3