Merge branch 'break_mono'
						commit
						ecb3157a91
					
				| @ -0,0 +1,170 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | Python module to derive a key from an monoalphabetically encrypted file. | ||||||
|  | Does not work yet. | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | import re | ||||||
|  | from collections import Counter | ||||||
|  | 
 | ||||||
|  | class Breaker(): | ||||||
|  |     """ | ||||||
|  |     A handle on the various bits of data needed to derive the key from the ciphertext. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def read_word_file(word_file): | ||||||
|  |         """ | ||||||
|  |         Helper function to read the words file into memory. The rationale was that | ||||||
|  |         querying would be faster and it would be possible to delete used words | ||||||
|  |         without mutating the file. | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         words = [] | ||||||
|  |         with open(word_file, 'r') as wf: | ||||||
|  |             for line in wf: | ||||||
|  |                 word = line[:-1] # remove trailing newline | ||||||
|  |                 word = word.lower() | ||||||
|  | 
 | ||||||
|  |                 if word == "" or not word.isalpha(): | ||||||
|  |                     continue | ||||||
|  | 
 | ||||||
|  |                 words.append(word) | ||||||
|  |         return words | ||||||
|  | 
 | ||||||
|  |     def __init__(self, ciphertext, word_file): | ||||||
|  | 
 | ||||||
|  |         # count chars in the ciphertext | ||||||
|  |         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] | ||||||
|  | 
 | ||||||
|  |         self.text = ciphertext | ||||||
|  |         self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } | ||||||
|  |         self.words = Breaker.read_word_file(word_file) | ||||||
|  | 
 | ||||||
|  |     def choose_word(self): | ||||||
|  |         """ | ||||||
|  |         Iterate through the word list and pick a word for pattern matching. | ||||||
|  |         Words with chars that are completely known are are being removed. | ||||||
|  |         Words where less then a third of the chars are known are being skipped. | ||||||
|  |         """ | ||||||
|  |         known_chars = self.key.keys() | ||||||
|  |         for word in self.words: | ||||||
|  | 
 | ||||||
|  |             word = word + ""  # copy | ||||||
|  | 
 | ||||||
|  |             if len(word) == 0: | ||||||
|  |                 self.words.remove(word) | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             # count known chars in word | ||||||
|  |             n = 0 | ||||||
|  |             for char in word: | ||||||
|  |                 if char in known_chars: | ||||||
|  |                     n +=1 | ||||||
|  | 
 | ||||||
|  |             # remove known words | ||||||
|  |             if n == len(word): | ||||||
|  |                 self.words.remove(word) | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             # skip words with too many unknown chars | ||||||
|  |             if (n / len(word) < 0.3): continue | ||||||
|  | 
 | ||||||
|  |             self.words.remove(word) | ||||||
|  |             return word | ||||||
|  | 
 | ||||||
|  |         return "" | ||||||
|  | 
 | ||||||
|  |     def translate_and_regex(self, word: str): | ||||||
|  |         """ | ||||||
|  |         Prepare chosen word for pattern matching. | ||||||
|  |         Translate the known characters and replace the others with a regex '.' | ||||||
|  |         """ | ||||||
|  |         regex = "" | ||||||
|  |         for char in word: | ||||||
|  |             if char in self.key.keys(): | ||||||
|  |                 regex = regex + self.key[char] | ||||||
|  |             else: | ||||||
|  |                 regex = regex + "." | ||||||
|  |         return regex | ||||||
|  | 
 | ||||||
|  |     def match_ciphertext(self, regex): | ||||||
|  |         """ | ||||||
|  |         Compile the translated chosen word to a regular expression and find all | ||||||
|  |         matches inside the ciphertext. Count the occurences and pick the most | ||||||
|  |         frequent one. | ||||||
|  |         """ | ||||||
|  |         regexc = re.compile(regex) | ||||||
|  |         count = Counter(regexc.findall(self.text)) | ||||||
|  |         if len(count) == 0: | ||||||
|  |             return "" | ||||||
|  |         else: | ||||||
|  |             return count.most_common(1)[0][0] | ||||||
|  | 
 | ||||||
|  |     def extract_unknown(self, plain, regex, cipher): | ||||||
|  |         """ | ||||||
|  |         Compare the the chosen words in it's various forms to infer which | ||||||
|  |         new characters may be added to the alphabet map. | ||||||
|  |         The dots inside `regex' symbolize the positions of unknown characters | ||||||
|  |         and provide a mapping between `plain' and `cipher' text. | ||||||
|  |         """ | ||||||
|  |         assert len(plain) == len(regex) == len(cipher) | ||||||
|  |         assert "." in regex | ||||||
|  | 
 | ||||||
|  |         for i in range(len(regex)): | ||||||
|  |             if regex[i] != ".": continue | ||||||
|  |             self.key.update({ plain[i] : cipher[i] }) | ||||||
|  | 
 | ||||||
|  |         return self.key | ||||||
|  | 
 | ||||||
|  |     def key_to_str(self): | ||||||
|  |         return "".join(self.key.keys()) | ||||||
|  | 
 | ||||||
|  |     def get_key(self): | ||||||
|  |         """ | ||||||
|  |         Strings everything together. | ||||||
|  |         Unfortunatly it does not work. | ||||||
|  |         """ | ||||||
|  |         while len(self.key.keys()) < 26: | ||||||
|  | 
 | ||||||
|  |             word   = self.choose_word() | ||||||
|  | 
 | ||||||
|  |             if word == "": break  # no more words with unknown chars | ||||||
|  | 
 | ||||||
|  |             regex  = self.translate_and_regex(word) | ||||||
|  |             cipher = self.match_ciphertext(regex) | ||||||
|  | 
 | ||||||
|  |             if cipher == "": continue | ||||||
|  |             self.extract_unknown(word, regex, cipher) | ||||||
|  | 
 | ||||||
|  |         return self.key_to_str() | ||||||
|  |     ## end Breaker | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     import sys, os | ||||||
|  |     import argparse | ||||||
|  | 
 | ||||||
|  |     # cannot import from a parent package if called directly | ||||||
|  |     # without modifying PYTHONPATH or sys.path | ||||||
|  |     file_dir = os.path.dirname(os.path.abspath(__file__)) | ||||||
|  |     file_parent_dir = os.path.dirname(file_dir) | ||||||
|  |     sys.path.append(file_parent_dir) | ||||||
|  | 
 | ||||||
|  |     from libex01 import read_text | ||||||
|  | 
 | ||||||
|  |     def parse_args(sys_argv): | ||||||
|  |         parser = argparse.ArgumentParser() | ||||||
|  |         parser.add_argument("FILE") | ||||||
|  |         return parser.parse_args(sys_argv[1:]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     args = parse_args(sys.argv) | ||||||
|  |     txt = read_text(args.FILE) | ||||||
|  |     word_file = "common.txt" | ||||||
|  | 
 | ||||||
|  |     bm = Breaker(txt, word_file) | ||||||
|  | 
 | ||||||
|  |     print(bm.get_key()) | ||||||
					Loading…
					
					
				
		Reference in New Issue