fügt diverse Bugfixes hinzu

2020-11-25 02:15:30 +01:00
parent 12cca20112
commit afb6faabbc
1 changed files with 40 additions and 17 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
 import re

 from collections import Counter
+#from string import ascii_lowercase

-
+import pdb

 class Breaker():

-
    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")

+    @staticmethod
+    def read_word_file(word_file):
+        # excuse me
+        words = []
+        with open(word_file, 'r') as wf:
+            for line in wf:
+                word = line[:-1] # remove trailing newline
+                word = word.lower()
+
+                if word == "" or not word.isalpha():
+                    continue
+
+                words.append(word)
+        return words
+
    def __init__(self, ciphertext, word_file):

        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]

-        # excuse me:
-        words = []
-        with open(word_file, 'r') as wf:
-            for line in wf:
-                words.append(line[:-1])  # remove trailing newline and append
-
        self.text = ciphertext
        self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
-        self.words = words
+        self.words = Breaker.read_word_file(word_file)

    def choose_word(self):
        known_chars = self.key.keys()
        for i in range(len(self.words)):
            word = self.words[i]

-            if len(word) == 0: continue
+            if len(word) == 0:
+                self.words.pop(i)
+                continue

            # count known chars in word
            n = 0
@@ -42,11 +53,15 @@ class Breaker():
                if char in known_chars:
                    n +=1

-            # skip known words or words with too many unknown
-            if n == len(word) or n / len(word) < 0.3:
+            # remove known words
+            if n == len(word):
+                self.words.pop(i)
                continue

-            return self.words[i]
+            # skip words with too many unknown chars
+            if (n / len(word) < 0.3): continue
+
+            return self.words.pop(i)

    def translate_and_regex(self, word: str):
        regex = ""
@@ -58,12 +73,16 @@ class Breaker():
        return regex

    def match_ciphertext(self, regex):
-        rx = re.compile(regex)
-        count = Counter(re.findall(self.text))
-        return count.most_common(1)[0][0]
+        regexc = re.compile(regex)
+        count = Counter(regexc.findall(self.text))
+        if len(count) == 0:
+            return ""
+        else:
+            return count.most_common(1)[0][0]

    def extract_unknown(self, plain, regex, cipher):
        assert len(plain) == len(regex) == len(cipher)
+        assert "." in regex

        for i in range(len(regex)):
            if regex[i] != ".": continue
@@ -81,7 +100,11 @@ class Breaker():
            word   = self.choose_word()
            regex  = self.translate_and_regex(word)
            cipher = self.match_ciphertext(regex)
-            print(word, regex, cipher, self.extract_unknown())
+
+            if cipher == "": continue
+
+            print(word, regex, cipher,
+                  self.extract_unknown(word, regex, cipher))

        return self.key_to_str()