Fügt Dokumentation hinzu

2020-11-25 03:55:50 +01:00
parent afb6faabbc
commit ed3d03d599
1 changed files with 75 additions and 16 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -2,22 +2,27 @@

 """
 Python module to derive a key from an monoalphabetically encrypted file.
+Does not work yet.
 """

 import re
-
 from collections import Counter
-#from string import ascii_lowercase
-
-import pdb

 class Breaker():
+    """
+    A handle on the various bits of data needed to derive the key from the ciphertext.
+    """

    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")

    @staticmethod
    def read_word_file(word_file):
-        # excuse me
+        """
+        Helper function to read the words file into memory. The rationale was that
+        querying would be faster and it would be possible to delete used words
+        without mutating the file.
+        """
+
        words = []
        with open(word_file, 'r') as wf:
            for line in wf:
@@ -32,6 +37,7 @@ class Breaker():

    def __init__(self, ciphertext, word_file):

+        # count chars in the ciphertext
        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]

        self.text = ciphertext
@@ -39,12 +45,18 @@ class Breaker():
        self.words = Breaker.read_word_file(word_file)

    def choose_word(self):
+        """
+        Iterate through the word list and pick a word for pattern matching.
+        Words with chars that are completely known are are being removed.
+        Words where less then a third of the chars are known are being skipped.
+        """
        known_chars = self.key.keys()
-        for i in range(len(self.words)):
-            word = self.words[i]
+        for word in self.words:
+
+            word = word + ""  # copy

            if len(word) == 0:
-                self.words.pop(i)
+                self.words.remove(word)
                continue

            # count known chars in word
@@ -55,15 +67,22 @@ class Breaker():

            # remove known words
            if n == len(word):
-                self.words.pop(i)
+                self.words.remove(word)
                continue

            # skip words with too many unknown chars
            if (n / len(word) < 0.3): continue

-            return self.words.pop(i)
+            self.words.remove(word)
+            return word
+
+        return ""

    def translate_and_regex(self, word: str):
+        """
+        Prepare chosen word for pattern matching.
+        Translate the known characters and replace the others with a regex '.'
+        """
        regex = ""
        for char in word:
            if char in self.key.keys():
@@ -73,6 +92,11 @@ class Breaker():
        return regex

    def match_ciphertext(self, regex):
+        """
+        Compile the translated chosen word to a regular expression and find all
+        matches inside the ciphertext. Count the occurences and pick the most
+        frequent one.
+        """
        regexc = re.compile(regex)
        count = Counter(regexc.findall(self.text))
        if len(count) == 0:
@@ -81,6 +105,12 @@ class Breaker():
            return count.most_common(1)[0][0]

    def extract_unknown(self, plain, regex, cipher):
+        """
+        Compare the the chosen words in it's various forms to infer which
+        new characters may be added to the alphabet map.
+        The dots inside `regex' symbolize the positions of unknown characters
+        and provide a mapping between `plain' and `cipher' text.
+        """
        assert len(plain) == len(regex) == len(cipher)
        assert "." in regex

@@ -91,21 +121,50 @@ class Breaker():
        return self.key

    def key_to_str(self):
-        return str(self.key.keys())
+        return "".join(self.key.keys())

    def get_key(self):
-
+        """
+        Strings everything together.
+        Unfortunatly it does not work.
+        """
        while len(self.key.keys()) < 26:

            word   = self.choose_word()
+
+            if word == "": break  # no more words with unknown chars
+
            regex  = self.translate_and_regex(word)
            cipher = self.match_ciphertext(regex)

            if cipher == "": continue
-
-            print(word, regex, cipher,
-                  self.extract_unknown(word, regex, cipher))
+            self.extract_unknown(word, regex, cipher)

        return self.key_to_str()
-
    ## end Breaker
+
+if __name__ == "__main__":
+    import sys, os
+    import argparse
+
+    # cannot import from a parent package if called directly
+    # without modifying PYTHONPATH or sys.path
+    file_dir = os.path.dirname(os.path.abspath(__file__))
+    file_parent_dir = os.path.dirname(file_dir)
+    sys.path.append(file_parent_dir)
+
+    from libex01 import read_text
+
+    def parse_args(sys_argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument("FILE")
+        return parser.parse_args(sys_argv[1:])
+
+
+    args = parse_args(sys.argv)
+    txt = read_text(args.FILE)
+    word_file = "common.txt"
+
+    bm = Breaker(txt, word_file)
+
+    print(bm.get_key())