diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py index 5cb2840..e8b7aaa 100644 --- a/src/mono/break_mono.py +++ b/src/mono/break_mono.py @@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file. import re from collections import Counter +#from string import ascii_lowercase - +import pdb class Breaker(): - EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") - def __init__(self, ciphertext, word_file): - - most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] - - # excuse me: + @staticmethod + def read_word_file(word_file): + # excuse me words = [] with open(word_file, 'r') as wf: for line in wf: - words.append(line[:-1]) # remove trailing newline and append + word = line[:-1] # remove trailing newline + word = word.lower() + + if word == "" or not word.isalpha(): + continue + + words.append(word) + return words + + def __init__(self, ciphertext, word_file): + + most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] self.text = ciphertext self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } - self.words = words + self.words = Breaker.read_word_file(word_file) def choose_word(self): known_chars = self.key.keys() for i in range(len(self.words)): word = self.words[i] - if len(word) == 0: continue + if len(word) == 0: + self.words.pop(i) + continue # count known chars in word n = 0 @@ -42,11 +53,15 @@ class Breaker(): if char in known_chars: n +=1 - # skip known words or words with too many unknown - if n == len(word) or n / len(word) < 0.3: + # remove known words + if n == len(word): + self.words.pop(i) continue - return self.words[i] + # skip words with too many unknown chars + if (n / len(word) < 0.3): continue + + return self.words.pop(i) def translate_and_regex(self, word: str): regex = "" @@ -58,12 +73,16 @@ class Breaker(): return regex def match_ciphertext(self, regex): - rx = re.compile(regex) - count = Counter(re.findall(self.text)) - return count.most_common(1)[0][0] + regexc = re.compile(regex) + count = Counter(regexc.findall(self.text)) + if len(count) == 0: + return "" + else: + return count.most_common(1)[0][0] def extract_unknown(self, plain, regex, cipher): assert len(plain) == len(regex) == len(cipher) + assert "." in regex for i in range(len(regex)): if regex[i] != ".": continue @@ -81,7 +100,11 @@ class Breaker(): word = self.choose_word() regex = self.translate_and_regex(word) cipher = self.match_ciphertext(regex) - print(word, regex, cipher, self.extract_unknown()) + + if cipher == "": continue + + print(word, regex, cipher, + self.extract_unknown(word, regex, cipher)) return self.key_to_str()