parent
							
								
									30768b4592
								
							
						
					
					
						commit
						0004679a83
					
				| @ -1,170 +1,59 @@ | |||||||
| #!/usr/bin/env python | from random import randint as rand | ||||||
|  | def randomize_key(key): | ||||||
|  |     a_index = rand(0, 25) | ||||||
|  |     b_index = rand(0, 25) | ||||||
|  |     a = key[a_index] | ||||||
|  |     b = key[b_index] | ||||||
|  |     key = key[:a_index] + b + key[a_index + 1:] | ||||||
|  |     key = key[:b_index] + a + key[b_index + 1:] | ||||||
| 
 | 
 | ||||||
| """ |     return key | ||||||
| Python module to derive a key from an monoalphabetically encrypted file. |  | ||||||
| Does not work yet. |  | ||||||
| """ |  | ||||||
| 
 | 
 | ||||||
| import re |  | ||||||
| from collections import Counter |  | ||||||
| 
 |  | ||||||
| class Breaker(): |  | ||||||
|     """ |  | ||||||
|     A handle on the various bits of data needed to derive the key from the ciphertext. |  | ||||||
|     """ |  | ||||||
| 
 |  | ||||||
|     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") |  | ||||||
| 
 |  | ||||||
|     @staticmethod |  | ||||||
|     def read_word_file(word_file): |  | ||||||
|         """ |  | ||||||
|         Helper function to read the words file into memory. The rationale was that |  | ||||||
|         querying would be faster and it would be possible to delete used words |  | ||||||
|         without mutating the file. |  | ||||||
|         """ |  | ||||||
| 
 |  | ||||||
|         words = [] |  | ||||||
|         with open(word_file, 'r') as wf: |  | ||||||
|             for line in wf: |  | ||||||
|                 word = line[:-1] # remove trailing newline |  | ||||||
|                 word = word.lower() |  | ||||||
| 
 |  | ||||||
|                 if word == "" or not word.isalpha(): |  | ||||||
|                     continue |  | ||||||
| 
 |  | ||||||
|                 words.append(word) |  | ||||||
|         return words |  | ||||||
| 
 |  | ||||||
|     def __init__(self, ciphertext, word_file): |  | ||||||
| 
 |  | ||||||
|         # count chars in the ciphertext |  | ||||||
|         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] |  | ||||||
| 
 |  | ||||||
|         self.text = ciphertext |  | ||||||
|         self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } |  | ||||||
|         self.words = Breaker.read_word_file(word_file) |  | ||||||
| 
 |  | ||||||
|     def choose_word(self): |  | ||||||
|         """ |  | ||||||
|         Iterate through the word list and pick a word for pattern matching. |  | ||||||
|         Words with chars that are completely known are are being removed. |  | ||||||
|         Words where less then a third of the chars are known are being skipped. |  | ||||||
|         """ |  | ||||||
|         known_chars = self.key.keys() |  | ||||||
|         for word in self.words: |  | ||||||
| 
 |  | ||||||
|             word = word + ""  # copy |  | ||||||
| 
 |  | ||||||
|             if len(word) == 0: |  | ||||||
|                 self.words.remove(word) |  | ||||||
|                 continue |  | ||||||
| 
 |  | ||||||
|             # count known chars in word |  | ||||||
|             n = 0 |  | ||||||
|             for char in word: |  | ||||||
|                 if char in known_chars: |  | ||||||
|                     n +=1 |  | ||||||
| 
 |  | ||||||
|             # remove known words |  | ||||||
|             if n == len(word): |  | ||||||
|                 self.words.remove(word) |  | ||||||
|                 continue |  | ||||||
| 
 |  | ||||||
|             # skip words with too many unknown chars |  | ||||||
|             if (n / len(word) < 0.3): continue |  | ||||||
| 
 |  | ||||||
|             self.words.remove(word) |  | ||||||
|             return word |  | ||||||
| 
 |  | ||||||
|         return "" |  | ||||||
| 
 |  | ||||||
|     def translate_and_regex(self, word: str): |  | ||||||
|         """ |  | ||||||
|         Prepare chosen word for pattern matching. |  | ||||||
|         Translate the known characters and replace the others with a regex '.' |  | ||||||
|         """ |  | ||||||
|         regex = "" |  | ||||||
|         for char in word: |  | ||||||
|             if char in self.key.keys(): |  | ||||||
|                 regex = regex + self.key[char] |  | ||||||
|             else: |  | ||||||
|                 regex = regex + "." |  | ||||||
|         return regex |  | ||||||
| 
 |  | ||||||
|     def match_ciphertext(self, regex): |  | ||||||
|         """ |  | ||||||
|         Compile the translated chosen word to a regular expression and find all |  | ||||||
|         matches inside the ciphertext. Count the occurences and pick the most |  | ||||||
|         frequent one. |  | ||||||
|         """ |  | ||||||
|         regexc = re.compile(regex) |  | ||||||
|         count = Counter(regexc.findall(self.text)) |  | ||||||
|         if len(count) == 0: |  | ||||||
|             return "" |  | ||||||
|         else: |  | ||||||
|             return count.most_common(1)[0][0] |  | ||||||
| 
 |  | ||||||
|     def extract_unknown(self, plain, regex, cipher): |  | ||||||
|         """ |  | ||||||
|         Compare the the chosen words in it's various forms to infer which |  | ||||||
|         new characters may be added to the alphabet map. |  | ||||||
|         The dots inside `regex' symbolize the positions of unknown characters |  | ||||||
|         and provide a mapping between `plain' and `cipher' text. |  | ||||||
|         """ |  | ||||||
|         assert len(plain) == len(regex) == len(cipher) |  | ||||||
|         assert "." in regex |  | ||||||
| 
 |  | ||||||
|         for i in range(len(regex)): |  | ||||||
|             if regex[i] != ".": continue |  | ||||||
|             self.key.update({ plain[i] : cipher[i] }) |  | ||||||
| 
 |  | ||||||
|         return self.key |  | ||||||
| 
 |  | ||||||
|     def key_to_str(self): |  | ||||||
|         return "".join(self.key.keys()) |  | ||||||
| 
 |  | ||||||
|     def get_key(self): |  | ||||||
|         """ |  | ||||||
|         Strings everything together. |  | ||||||
|         Unfortunatly it does not work. |  | ||||||
|         """ |  | ||||||
|         while len(self.key.keys()) < 26: |  | ||||||
| 
 |  | ||||||
|             word   = self.choose_word() |  | ||||||
| 
 |  | ||||||
|             if word == "": break  # no more words with unknown chars |  | ||||||
| 
 |  | ||||||
|             regex  = self.translate_and_regex(word) |  | ||||||
|             cipher = self.match_ciphertext(regex) |  | ||||||
| 
 |  | ||||||
|             if cipher == "": continue |  | ||||||
|             self.extract_unknown(word, regex, cipher) |  | ||||||
| 
 |  | ||||||
|         return self.key_to_str() |  | ||||||
|     ## end Breaker |  | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     import sys, os |  | ||||||
|     import argparse |     import argparse | ||||||
| 
 |  | ||||||
|     # cannot import from a parent package if called directly |  | ||||||
|     # without modifying PYTHONPATH or sys.path |  | ||||||
|     file_dir = os.path.dirname(os.path.abspath(__file__)) |  | ||||||
|     file_parent_dir = os.path.dirname(file_dir) |  | ||||||
|     sys.path.append(file_parent_dir) |  | ||||||
| 
 |  | ||||||
|     from libex01 import read_text |  | ||||||
| 
 |  | ||||||
|     def parse_args(sys_argv): |  | ||||||
|     parser = argparse.ArgumentParser() |     parser = argparse.ArgumentParser() | ||||||
|         parser.add_argument("FILE") |     parser.add_argument('FILE') | ||||||
|         return parser.parse_args(sys_argv[1:]) |     args = parser.parse_args() | ||||||
| 
 | 
 | ||||||
| 
 |     t = ''.join([x for x in open(args.FILE, "r").read().lower() if x.isalpha()]) | ||||||
|     args = parse_args(sys.argv) |     import os | ||||||
|     txt = read_text(args.FILE) |     words = open(os.path.abspath(os.path.dirname(__file__))+"/common.txt", "r").read().split('\n') | ||||||
|     word_file = "common.txt" | 
 | ||||||
| 
 |     occurance_string = 'etaoinsrhdlucmfywgpbvkxqjz' | ||||||
|     bm = Breaker(txt, word_file) | 
 | ||||||
|  |     occur_count = {chr(x+0x61): 0 for x in range(26)} | ||||||
|  |     for char in t: | ||||||
|  |         occur_count[char]+=1 | ||||||
|  |     mapping = {} | ||||||
|  |     sorted_occur = sorted(occur_count, key=occur_count.__getitem__, reverse=True) | ||||||
|  |     for i in range(26): | ||||||
|  |         mapping[occurance_string[i]]=sorted_occur[i] | ||||||
|  |     key = ''.join([x for _, x in sorted(mapping.items())]) | ||||||
|  | 
 | ||||||
|  |     import re | ||||||
|  |     import mono | ||||||
|  | 
 | ||||||
|  |     best_score = 0 | ||||||
|  |     best_key = key | ||||||
|  | 
 | ||||||
|  |     trys = 0 | ||||||
|  |     while trys < 1000: | ||||||
|  |         score = 0 | ||||||
|  |         tmp = randomize_key(key) | ||||||
|  |         plain = mono.mono_decrypt(t, tmp) | ||||||
|  |         for word in words: | ||||||
|  |             if re.search(word, plain): | ||||||
|  |                 score += 1 | ||||||
|  |         if score > best_score: | ||||||
|  |             trys = 0 | ||||||
|  |             best_score = score  | ||||||
|  |             best_key = key | ||||||
|  |             key=tmp | ||||||
|  |             print(best_score) | ||||||
|  |         else: | ||||||
|  |             trys+=1 | ||||||
| 
 | 
 | ||||||
|     print(bm.get_key()) |     print(best_key) | ||||||
|  |     print() | ||||||
|  |     print(mono.mono_decrypt(t, best_key)) | ||||||
|  | |||||||
					Loading…
					
					
				
		Reference in New Issue