From 12cca20112701e9f7e101c319e52b15fe562b150 Mon Sep 17 00:00:00 2001 From: Daniel Tschertkow Date: Tue, 24 Nov 2020 23:52:14 +0100 Subject: [PATCH] break_mono.py wird komplett umgeschrieben --- src/mono/break_mono.py | 161 +++++++++++++++++++------------------------------ 1 file changed, 61 insertions(+), 100 deletions(-) diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py index a1f94c2..5cb2840 100644 --- a/src/mono/break_mono.py +++ b/src/mono/break_mono.py @@ -3,125 +3,86 @@ """ Python module to derive a key from an monoalphabetically encrypted file. """ -import itertools as it + import re -from collections import OrderedDict from collections import Counter -from string import ascii_lowercase + class Breaker(): - ## frequency analysis - EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") - @staticmethod - def get_frequency(text): - freq = Counter(text) - # Counter with lowercase ascii letters all having a count of 0 - missing = Counter(dict(it.product(ascii_lowercase, [0]))) - freq.update(missing) - return freq - - - @staticmethod - def derive_alphabet_freq(freq: Counter): - most_freq = [ item[0] for item in freq.most_common() ] - #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) - return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq)) - - - ## pattern matching - @staticmethod - def get_word_containing(word_file, char_list: list): - """ - Find word from a word list file (common.txt) containing the chars - in `char_list'. - Return None it no word matches or - TUPLE(word, pos) where `pos' is a LIST of matching positions. - """ - with open(word_file, 'r') as f: - for line in f: - word = line[:-1] - pos = [] - for char in char_list: - pos.append(word.find(char)) - if -1 not in pos: - return word, pos - return None, None - - @staticmethod - def positions(text: str, sub): - index = text.find(sub) - while index != -1: - yield index - index = text.find(sub, index + 1) - return index - - - @staticmethod - def match_ciphertext(text: str, word_pos: tuple, char: tuple): - """ - asdf - """ - word, wposl = word_pos - wpos = wposl[0] - wlen = len(word) - - snip_count = Counter() - for pos in Breaker.positions(text, char): - word_begin = pos - wpos - snippet = text[word_begin : word_begin + wlen] - - if snippet not in snip_count.elements(): - snip_count[snippet] = text.count(snippet) - - return snip_count.most_common(1)[0][0] - - @staticmethod - def choose_known_letters(key_alphabet): - letters = list(key_alphabet.keys()) - if len(key_alphabet) < 3: - yield letters - else: - for i in letters: - for j in letters: - for k in letters: - if k == j or k == i or j == i: continue - yield [i, j, k] - return None + EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") def __init__(self, ciphertext, word_file): - self.ciphertext = ciphertext - self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext)) - self.word_file = word_file + most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] - def get_key(self): + # excuse me: + words = [] + with open(word_file, 'r') as wf: + for line in wf: + words.append(line[:-1]) # remove trailing newline and append + + self.text = ciphertext + self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } + self.words = words + + def choose_word(self): + known_chars = self.key.keys() + for i in range(len(self.words)): + word = self.words[i] - key_alphabet = OrderedDict() + if len(word) == 0: continue - # most frequent char in English and corresponding most common char in text - most_freq = self.alph[Breaker.EN_LETTER_FREQ] + # count known chars in word + n = 0 + for char in word: + if char in known_chars: + n +=1 - key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq + # skip known words or words with too many unknown + if n == len(word) or n / len(word) < 0.3: + continue - while len(key_alphabet) < 26: + return self.words[i] - word_pos = Breaker.get_word_containing( - self.word_file, - next(Breaker.choose_known_letters(key_alphabet)) - ) + def translate_and_regex(self, word: str): + regex = "" + for char in word: + if char in self.key.keys(): + regex = regex + self.key[char] + else: + regex = regex + "." + return regex + + def match_ciphertext(self, regex): + rx = re.compile(regex) + count = Counter(re.findall(self.text)) + return count.most_common(1)[0][0] + + def extract_unknown(self, plain, regex, cipher): + assert len(plain) == len(regex) == len(cipher) + + for i in range(len(regex)): + if regex[i] != ".": continue + self.key.update({ plain[i] : cipher[i] }) + + return self.key + + def key_to_str(self): + return str(self.key.keys()) + + def get_key(self): - pass + while len(self.key.keys()) < 26: - most_common = Breaker.match_ciphertext( - self.ciphertext, - word_pos, - most_freq - ) + word = self.choose_word() + regex = self.translate_and_regex(word) + cipher = self.match_ciphertext(regex) + print(word, regex, cipher, self.extract_unknown()) - pass + return self.key_to_str() ## end Breaker