|  |  | @ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file. | 
			
		
	
		
		
			
				
					
					|  |  |  | import re |  |  |  | import re | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | from collections import Counter |  |  |  | from collections import Counter | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | #from string import ascii_lowercase | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | import pdb | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | class Breaker(): |  |  |  | class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") |  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def __init__(self, ciphertext, word_file): |  |  |  |     @staticmethod | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |     def read_word_file(word_file): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] |  |  |  |         # excuse me | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         # excuse me: |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         words = [] |  |  |  |         words = [] | 
			
		
	
		
		
			
				
					
					|  |  |  |         with open(word_file, 'r') as wf: |  |  |  |         with open(word_file, 'r') as wf: | 
			
		
	
		
		
			
				
					
					|  |  |  |             for line in wf: |  |  |  |             for line in wf: | 
			
		
	
		
		
			
				
					
					|  |  |  |                 words.append(line[:-1])  # remove trailing newline and append |  |  |  |                 word = line[:-1] # remove trailing newline | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 word = word.lower() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 if word == "" or not word.isalpha(): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                     continue | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 words.append(word) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         return words | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     def __init__(self, ciphertext, word_file): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.text = ciphertext |  |  |  |         self.text = ciphertext | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } |  |  |  |         self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.words = words |  |  |  |         self.words = Breaker.read_word_file(word_file) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def choose_word(self): |  |  |  |     def choose_word(self): | 
			
		
	
		
		
			
				
					
					|  |  |  |         known_chars = self.key.keys() |  |  |  |         known_chars = self.key.keys() | 
			
		
	
		
		
			
				
					
					|  |  |  |         for i in range(len(self.words)): |  |  |  |         for i in range(len(self.words)): | 
			
		
	
		
		
			
				
					
					|  |  |  |             word = self.words[i] |  |  |  |             word = self.words[i] | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             if len(word) == 0: continue |  |  |  |             if len(word) == 0: | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 self.words.pop(i) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 continue | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             # count known chars in word |  |  |  |             # count known chars in word | 
			
		
	
		
		
			
				
					
					|  |  |  |             n = 0 |  |  |  |             n = 0 | 
			
		
	
	
		
		
			
				
					|  |  | @ -42,11 +53,15 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |                 if char in known_chars: |  |  |  |                 if char in known_chars: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     n +=1 |  |  |  |                     n +=1 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             # skip known words or words with too many unknown |  |  |  |             # remove known words | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |             if n == len(word) or n / len(word) < 0.3: |  |  |  |             if n == len(word): | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 self.words.pop(i) | 
			
		
	
		
		
			
				
					
					|  |  |  |                 continue |  |  |  |                 continue | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |             return self.words[i] |  |  |  |             # skip words with too many unknown chars | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             if (n / len(word) < 0.3): continue | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             return self.words.pop(i) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def translate_and_regex(self, word: str): |  |  |  |     def translate_and_regex(self, word: str): | 
			
		
	
		
		
			
				
					
					|  |  |  |         regex = "" |  |  |  |         regex = "" | 
			
		
	
	
		
		
			
				
					|  |  | @ -58,12 +73,16 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |         return regex |  |  |  |         return regex | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def match_ciphertext(self, regex): |  |  |  |     def match_ciphertext(self, regex): | 
			
		
	
		
		
			
				
					
					|  |  |  |         rx = re.compile(regex) |  |  |  |         regexc = re.compile(regex) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         count = Counter(re.findall(self.text)) |  |  |  |         count = Counter(regexc.findall(self.text)) | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         if len(count) == 0: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             return "" | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         else: | 
			
		
	
		
		
			
				
					
					|  |  |  |             return count.most_common(1)[0][0] |  |  |  |             return count.most_common(1)[0][0] | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def extract_unknown(self, plain, regex, cipher): |  |  |  |     def extract_unknown(self, plain, regex, cipher): | 
			
		
	
		
		
			
				
					
					|  |  |  |         assert len(plain) == len(regex) == len(cipher) |  |  |  |         assert len(plain) == len(regex) == len(cipher) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         assert "." in regex | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         for i in range(len(regex)): |  |  |  |         for i in range(len(regex)): | 
			
		
	
		
		
			
				
					
					|  |  |  |             if regex[i] != ".": continue |  |  |  |             if regex[i] != ".": continue | 
			
		
	
	
		
		
			
				
					|  |  | @ -81,7 +100,11 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |             word   = self.choose_word() |  |  |  |             word   = self.choose_word() | 
			
		
	
		
		
			
				
					
					|  |  |  |             regex  = self.translate_and_regex(word) |  |  |  |             regex  = self.translate_and_regex(word) | 
			
		
	
		
		
			
				
					
					|  |  |  |             cipher = self.match_ciphertext(regex) |  |  |  |             cipher = self.match_ciphertext(regex) | 
			
		
	
		
		
			
				
					
					|  |  |  |             print(word, regex, cipher, self.extract_unknown()) |  |  |  | 
 | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             if cipher == "": continue | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             print(word, regex, cipher, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                   self.extract_unknown(word, regex, cipher)) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         return self.key_to_str() |  |  |  |         return self.key_to_str() | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | 
 |