diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py index ddc7995..58f42c0 100644 --- a/src/mono/break_mono.py +++ b/src/mono/break_mono.py @@ -8,12 +8,13 @@ from collections import OrderedDict from collections import Counter from string import ascii_lowercase -#from libex01 import read_text -def mono_break(enc_txt: str): +class Breaker(): + ## frequency analysis EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") + @staticmethod def get_frequency(text): freq = Counter(text) # Counter with lowercase ascii letters all having a count of 0 @@ -21,88 +22,41 @@ def mono_break(enc_txt: str): freq.update(missing) return freq - def derive_alphabet(freq: Counter): - return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ)) - - subs = derive_alphabet(get_frequency(enc_txt)) - dec = "" - for char in enc_txt: - dec += subs[char] - return dec - -## Frequenzanalyse -EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") - -def get_frequency(text): - freq = Counter(text) - # Counter with lowercase ascii letters all having a count of 0 - missing = Counter(dict(it.product(ascii_lowercase, [0]))) - freq.update(missing) - return freq - -def derive_alphabet_freq(freq: Counter): - most_freq = [ item[0] for item in freq.most_common() ] - #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) - return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) - -## Mustersuche -def next_char_anchor(text: str, char): - """ - Generator that takes the text and a char and yields positions of that char. - Adjust starting position by slicing. - Return generator closure. - """ - index = text.find(char) - while index != -1: - yield (index, char) - index = text.find(char, index + 1) - return (index, char) - - -def match_word(text: str, word: str, pos_iter): - """ - Align with anchor and check hypothesis. - First hypothesis is the frequency analysis. - Align `word' with `text' for each anchor. - For remaining anchors check the occurence of pattern - and match with word. If it is true more than once, save it as a - new hypothesis. - """ - pass - -def next_match(text: str, word: str, alphabet: dict, pos_iter: generator): - pass -class Breaker(): + @staticmethod + def derive_alphabet_freq(freq: Counter): + most_freq = [ item[0] for item in freq.most_common() ] + #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) + return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) + + + ## pattern matching + @staticmethod + def get_word_containing(word_file, char_list: list): + """ + Find word from a word list file (common.txt) containing the chars + in `char_list'. + Return None it no word matches or + TUPLE(word, pos) where `pos' is a LIST of matching positions. + """ + with open(word_file, 'r') as f: + for word in f: + pos = [] + for char in char_list: + pos += word.find(char) + if -1 not in pos: + return word, pos + return None - EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") def __init__(self, ciphertext, word_file): self.alph = derive_alphabet_freq(get_frequency(ciphertext)) self.word_file = None # TODO - def get_key(self): - - def get_word_containing(char_list: list): - """ - Find word from a word list file (common.txt) containing the chars - in `char_list'. - Return None it no word matches or - TUPLE(word, pos) where `pos' is a LIST of matching positions. - """ - with open(self.word_file, 'r') as f: - for word in f: - pos = [] - for char in char_list: - pos += word.find(char) - if -1 not in pos: - return word, pos - return None + def get_key(self): most_freq = next(iter(self.alph)) # most frequent char - word_having_char, pos = get_word_containing(most_freq) # unpack - - + word_having_char, pos = get_word_containing(self.word_file, most_freq) pass