fügt diverse Bugfixes hinzu

2020-11-25 02:15:30 +01:00
parent 12cca20112
commit afb6faabbc
1 changed files with 40 additions and 17 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
 import re
 from collections import Counter
 #from string import ascii_lowercase
-
+import pdb
 class Breaker():
    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
    @staticmethod
    def read_word_file(word_file):
        # excuse me
        words = []
        with open(word_file, 'r') as wf:
            for line in wf:
                word = line[:-1] # remove trailing newline
                word = word.lower()
                if word == "" or not word.isalpha():
                    continue
                words.append(word)
        return words
    def __init__(self, ciphertext, word_file):
        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
        # excuse me:
        words = []
        with open(word_file, 'r') as wf:
            for line in wf:
                words.append(line[:-1])  # remove trailing newline and append
        self.text = ciphertext
        self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
-        self.words = words
+        self.words = Breaker.read_word_file(word_file)
    def choose_word(self):
        known_chars = self.key.keys()
        for i in range(len(self.words)):
            word = self.words[i]
-            if len(word) == 0: continue
+            if len(word) == 0:
                self.words.pop(i)
                continue
            # count known chars in word
            n = 0
@@ -42,11 +53,15 @@ class Breaker():
                if char in known_chars:
                    n +=1
-            # skip known words or words with too many unknown
+            # remove known words
-            if n == len(word) or n / len(word) < 0.3:
+            if n == len(word):
                self.words.pop(i)
                continue
-            return self.words[i]
+            # skip words with too many unknown chars
            if (n / len(word) < 0.3): continue
            return self.words.pop(i)
    def translate_and_regex(self, word: str):
        regex = ""
@@ -58,12 +73,16 @@ class Breaker():
        return regex
    def match_ciphertext(self, regex):
-        rx = re.compile(regex)
+        regexc = re.compile(regex)
-        count = Counter(re.findall(self.text))
+        count = Counter(regexc.findall(self.text))
        if len(count) == 0:
            return ""
        else:
            return count.most_common(1)[0][0]
    def extract_unknown(self, plain, regex, cipher):
        assert len(plain) == len(regex) == len(cipher)
        assert "." in regex
        for i in range(len(regex)):
            if regex[i] != ".": continue
@@ -81,7 +100,11 @@ class Breaker():
            word   = self.choose_word()
            regex  = self.translate_and_regex(word)
            cipher = self.match_ciphertext(regex)
-            print(word, regex, cipher, self.extract_unknown())
+
            if cipher == "": continue
            print(word, regex, cipher,
                  self.extract_unknown(word, regex, cipher))
        return self.key_to_str()