break_mono.py wird komplett umgeschrieben

2020-11-24 23:52:14 +01:00
parent 3331177041
commit 12cca20112
1 changed files with 65 additions and 104 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -3,125 +3,86 @@
 """
 Python module to derive a key from an monoalphabetically encrypted file.
 """
-import itertools as it
+
 import re
 from collections import OrderedDict
 from collections import Counter
-from string import ascii_lowercase
+
 class Breaker():
-    ## frequency analysis
+
    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
    @staticmethod
    def get_frequency(text):
        freq = Counter(text)
        # Counter with lowercase ascii letters all having a count of 0
        missing = Counter(dict(it.product(ascii_lowercase, [0])))
        freq.update(missing)
        return freq
    @staticmethod
    def derive_alphabet_freq(freq: Counter):
        most_freq = [ item[0] for item in freq.most_common() ]
        #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
        return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
    ## pattern matching
    @staticmethod
    def get_word_containing(word_file, char_list: list):
        """
        Find word from a word list file (common.txt) containing the chars
        in `char_list'.
        Return None it no word matches or
        TUPLE(word, pos) where `pos' is a LIST of matching positions.
        """
        with open(word_file, 'r') as f:
            for line in f:
                word = line[:-1]
                pos = []
                for char in char_list:
                    pos.append(word.find(char))
                if -1 not in pos:
                    return word, pos
        return None, None
    @staticmethod
    def positions(text: str, sub):
        index = text.find(sub)
        while index != -1:
            yield index
            index = text.find(sub, index + 1)
        return index
    @staticmethod
    def match_ciphertext(text: str, word_pos: tuple, char: tuple):
        """
        asdf
        """
        word, wposl = word_pos
        wpos = wposl[0]
        wlen = len(word)
        snip_count = Counter()
        for pos in Breaker.positions(text, char):
            word_begin = pos - wpos
            snippet = text[word_begin : word_begin + wlen]
            if snippet not in snip_count.elements():
                snip_count[snippet] = text.count(snippet)
        return snip_count.most_common(1)[0][0]
    @staticmethod
    def choose_known_letters(key_alphabet):
        letters = list(key_alphabet.keys())
        if len(key_alphabet) < 3:
            yield letters
        else:
            for i in letters:
                for j in letters:
                    for k in letters:
                        if k == j or k == i or j == i: continue
                        yield [i, j, k]
        return None
    def __init__(self, ciphertext, word_file):
        self.ciphertext = ciphertext
        self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
        self.word_file = word_file
        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
        # excuse me:
        words = []
        with open(word_file, 'r') as wf:
            for line in wf:
                words.append(line[:-1])  # remove trailing newline and append
        self.text = ciphertext
        self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
        self.words = words
    def choose_word(self):
        known_chars = self.key.keys()
        for i in range(len(self.words)):
            word = self.words[i]
            if len(word) == 0: continue
            # count known chars in word
            n = 0
            for char in word:
                if char in known_chars:
                    n +=1
            # skip known words or words with too many unknown
            if n == len(word) or n / len(word) < 0.3:
                continue
            return self.words[i]
    def translate_and_regex(self, word: str):
        regex = ""
        for char in word:
            if char in self.key.keys():
                regex = regex + self.key[char]
            else:
                regex = regex + "."
        return regex
    def match_ciphertext(self, regex):
        rx = re.compile(regex)
        count = Counter(re.findall(self.text))
        return count.most_common(1)[0][0]
    def extract_unknown(self, plain, regex, cipher):
        assert len(plain) == len(regex) == len(cipher)
        for i in range(len(regex)):
            if regex[i] != ".": continue
            self.key.update({ plain[i] : cipher[i] })
        return self.key
    def key_to_str(self):
        return str(self.key.keys())
    def get_key(self):
-        key_alphabet = OrderedDict()
+        while len(self.key.keys()) < 26:
-        # most frequent char in English and corresponding most common char in text
+            word   = self.choose_word()
-        most_freq = self.alph[Breaker.EN_LETTER_FREQ]
+            regex  = self.translate_and_regex(word)
            cipher = self.match_ciphertext(regex)
            print(word, regex, cipher, self.extract_unknown())
-        key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq
+        return self.key_to_str()
        while len(key_alphabet) < 26:
            word_pos = Breaker.get_word_containing(
                self.word_file,
                next(Breaker.choose_known_letters(key_alphabet))
            )
            pass
            most_common = Breaker.match_ciphertext(
                self.ciphertext,
                word_pos,
                most_freq
            )
        pass
    ## end Breaker