|  |  | @ -2,22 +2,27 @@ | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | """ |  |  |  | """ | 
			
		
	
		
		
			
				
					
					|  |  |  | Python module to derive a key from an monoalphabetically encrypted file. |  |  |  | Python module to derive a key from an monoalphabetically encrypted file. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | Does not work yet. | 
			
		
	
		
		
			
				
					
					|  |  |  | """ |  |  |  | """ | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | import re |  |  |  | import re | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | from collections import Counter |  |  |  | from collections import Counter | 
			
		
	
		
		
			
				
					
					|  |  |  | #from string import ascii_lowercase |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | import pdb |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | class Breaker(): |  |  |  | class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     """ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     A handle on the various bits of data needed to derive the key from the ciphertext. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     """ | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") |  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     @staticmethod |  |  |  |     @staticmethod | 
			
		
	
		
		
			
				
					
					|  |  |  |     def read_word_file(word_file): |  |  |  |     def read_word_file(word_file): | 
			
		
	
		
		
			
				
					
					|  |  |  |         # excuse me |  |  |  |         """ | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Helper function to read the words file into memory. The rationale was that | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         querying would be faster and it would be possible to delete used words | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         without mutating the file. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         words = [] |  |  |  |         words = [] | 
			
		
	
		
		
			
				
					
					|  |  |  |         with open(word_file, 'r') as wf: |  |  |  |         with open(word_file, 'r') as wf: | 
			
		
	
		
		
			
				
					
					|  |  |  |             for line in wf: |  |  |  |             for line in wf: | 
			
		
	
	
		
		
			
				
					|  |  | @ -32,6 +37,7 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def __init__(self, ciphertext, word_file): |  |  |  |     def __init__(self, ciphertext, word_file): | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         # count chars in the ciphertext | 
			
		
	
		
		
			
				
					
					|  |  |  |         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] |  |  |  |         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.text = ciphertext |  |  |  |         self.text = ciphertext | 
			
		
	
	
		
		
			
				
					|  |  | @ -39,12 +45,18 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.words = Breaker.read_word_file(word_file) |  |  |  |         self.words = Breaker.read_word_file(word_file) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def choose_word(self): |  |  |  |     def choose_word(self): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Iterate through the word list and pick a word for pattern matching. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Words with chars that are completely known are are being removed. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Words where less then a third of the chars are known are being skipped. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         known_chars = self.key.keys() |  |  |  |         known_chars = self.key.keys() | 
			
		
	
		
		
			
				
					
					|  |  |  |         for i in range(len(self.words)): |  |  |  |         for word in self.words: | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             word = self.words[i] |  |  |  | 
 | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             word = word + ""  # copy | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             if len(word) == 0: |  |  |  |             if len(word) == 0: | 
			
		
	
		
		
			
				
					
					|  |  |  |                 self.words.pop(i) |  |  |  |                 self.words.remove(word) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |                 continue |  |  |  |                 continue | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             # count known chars in word |  |  |  |             # count known chars in word | 
			
		
	
	
		
		
			
				
					|  |  | @ -55,15 +67,22 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             # remove known words |  |  |  |             # remove known words | 
			
		
	
		
		
			
				
					
					|  |  |  |             if n == len(word): |  |  |  |             if n == len(word): | 
			
		
	
		
		
			
				
					
					|  |  |  |                 self.words.pop(i) |  |  |  |                 self.words.remove(word) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |                 continue |  |  |  |                 continue | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             # skip words with too many unknown chars |  |  |  |             # skip words with too many unknown chars | 
			
		
	
		
		
			
				
					
					|  |  |  |             if (n / len(word) < 0.3): continue |  |  |  |             if (n / len(word) < 0.3): continue | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             return self.words.pop(i) |  |  |  |             self.words.remove(word) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             return word | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         return "" | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def translate_and_regex(self, word: str): |  |  |  |     def translate_and_regex(self, word: str): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Prepare chosen word for pattern matching. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Translate the known characters and replace the others with a regex '.' | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         regex = "" |  |  |  |         regex = "" | 
			
		
	
		
		
			
				
					
					|  |  |  |         for char in word: |  |  |  |         for char in word: | 
			
		
	
		
		
			
				
					
					|  |  |  |             if char in self.key.keys(): |  |  |  |             if char in self.key.keys(): | 
			
		
	
	
		
		
			
				
					|  |  | @ -73,6 +92,11 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |         return regex |  |  |  |         return regex | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def match_ciphertext(self, regex): |  |  |  |     def match_ciphertext(self, regex): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Compile the translated chosen word to a regular expression and find all | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         matches inside the ciphertext. Count the occurences and pick the most | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         frequent one. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         regexc = re.compile(regex) |  |  |  |         regexc = re.compile(regex) | 
			
		
	
		
		
			
				
					
					|  |  |  |         count = Counter(regexc.findall(self.text)) |  |  |  |         count = Counter(regexc.findall(self.text)) | 
			
		
	
		
		
			
				
					
					|  |  |  |         if len(count) == 0: |  |  |  |         if len(count) == 0: | 
			
		
	
	
		
		
			
				
					|  |  | @ -81,6 +105,12 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |             return count.most_common(1)[0][0] |  |  |  |             return count.most_common(1)[0][0] | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def extract_unknown(self, plain, regex, cipher): |  |  |  |     def extract_unknown(self, plain, regex, cipher): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Compare the the chosen words in it's various forms to infer which | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         new characters may be added to the alphabet map. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         The dots inside `regex' symbolize the positions of unknown characters | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         and provide a mapping between `plain' and `cipher' text. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         assert len(plain) == len(regex) == len(cipher) |  |  |  |         assert len(plain) == len(regex) == len(cipher) | 
			
		
	
		
		
			
				
					
					|  |  |  |         assert "." in regex |  |  |  |         assert "." in regex | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -91,21 +121,50 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |         return self.key |  |  |  |         return self.key | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def key_to_str(self): |  |  |  |     def key_to_str(self): | 
			
		
	
		
		
			
				
					
					|  |  |  |         return str(self.key.keys()) |  |  |  |         return "".join(self.key.keys()) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def get_key(self): |  |  |  |     def get_key(self): | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |         """ | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Strings everything together. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         Unfortunatly it does not work. | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         while len(self.key.keys()) < 26: |  |  |  |         while len(self.key.keys()) < 26: | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             word   = self.choose_word() |  |  |  |             word   = self.choose_word() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             if word == "": break  # no more words with unknown chars | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             regex  = self.translate_and_regex(word) |  |  |  |             regex  = self.translate_and_regex(word) | 
			
		
	
		
		
			
				
					
					|  |  |  |             cipher = self.match_ciphertext(regex) |  |  |  |             cipher = self.match_ciphertext(regex) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             if cipher == "": continue |  |  |  |             if cipher == "": continue | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |             self.extract_unknown(word, regex, cipher) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             print(word, regex, cipher, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |                   self.extract_unknown(word, regex, cipher)) |  |  |  |  | 
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         return self.key_to_str() |  |  |  |         return self.key_to_str() | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     ## end Breaker |  |  |  |     ## end Breaker | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | if __name__ == "__main__": | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     import sys, os | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     import argparse | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     # cannot import from a parent package if called directly | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     # without modifying PYTHONPATH or sys.path | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     file_dir = os.path.dirname(os.path.abspath(__file__)) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     file_parent_dir = os.path.dirname(file_dir) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     sys.path.append(file_parent_dir) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     from libex01 import read_text | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     def parse_args(sys_argv): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         parser = argparse.ArgumentParser() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         parser.add_argument("FILE") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         return parser.parse_args(sys_argv[1:]) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     args = parse_args(sys.argv) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     txt = read_text(args.FILE) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     word_file = "common.txt" | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     bm = Breaker(txt, word_file) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     print(bm.get_key()) | 
			
		
	
	
		
		
			
				
					|  |  | 
 |