alternative version of break mono

dooing something, poorly working
2020-11-25 11:28:40 +01:00
parent 30768b4592
commit 0004679a83
1 changed files with 47 additions and 158 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -1,170 +1,59 @@
-#!/usr/bin/env python
+from random import randint as rand
 def randomize_key(key):
    a_index = rand(0, 25)
    b_index = rand(0, 25)
    a = key[a_index]
    b = key[b_index]
    key = key[:a_index] + b + key[a_index + 1:]
    key = key[:b_index] + a + key[b_index + 1:]
-"""
+    return key
 Python module to derive a key from an monoalphabetically encrypted file.
 Does not work yet.
 """
 import re
 from collections import Counter
 class Breaker():
    """
    A handle on the various bits of data needed to derive the key from the ciphertext.
    """
    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
    @staticmethod
    def read_word_file(word_file):
        """
        Helper function to read the words file into memory. The rationale was that
        querying would be faster and it would be possible to delete used words
        without mutating the file.
        """
        words = []
        with open(word_file, 'r') as wf:
            for line in wf:
                word = line[:-1] # remove trailing newline
                word = word.lower()
                if word == "" or not word.isalpha():
                    continue
                words.append(word)
        return words
    def __init__(self, ciphertext, word_file):
        # count chars in the ciphertext
        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
        self.text = ciphertext
        self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
        self.words = Breaker.read_word_file(word_file)
    def choose_word(self):
        """
        Iterate through the word list and pick a word for pattern matching.
        Words with chars that are completely known are are being removed.
        Words where less then a third of the chars are known are being skipped.
        """
        known_chars = self.key.keys()
        for word in self.words:
            word = word + ""  # copy
            if len(word) == 0:
                self.words.remove(word)
                continue
            # count known chars in word
            n = 0
            for char in word:
                if char in known_chars:
                    n +=1
            # remove known words
            if n == len(word):
                self.words.remove(word)
                continue
            # skip words with too many unknown chars
            if (n / len(word) < 0.3): continue
            self.words.remove(word)
            return word
        return ""
    def translate_and_regex(self, word: str):
        """
        Prepare chosen word for pattern matching.
        Translate the known characters and replace the others with a regex '.'
        """
        regex = ""
        for char in word:
            if char in self.key.keys():
                regex = regex + self.key[char]
            else:
                regex = regex + "."
        return regex
    def match_ciphertext(self, regex):
        """
        Compile the translated chosen word to a regular expression and find all
        matches inside the ciphertext. Count the occurences and pick the most
        frequent one.
        """
        regexc = re.compile(regex)
        count = Counter(regexc.findall(self.text))
        if len(count) == 0:
            return ""
        else:
            return count.most_common(1)[0][0]
    def extract_unknown(self, plain, regex, cipher):
        """
        Compare the the chosen words in it's various forms to infer which
        new characters may be added to the alphabet map.
        The dots inside `regex' symbolize the positions of unknown characters
        and provide a mapping between `plain' and `cipher' text.
        """
        assert len(plain) == len(regex) == len(cipher)
        assert "." in regex
        for i in range(len(regex)):
            if regex[i] != ".": continue
            self.key.update({ plain[i] : cipher[i] })
        return self.key
    def key_to_str(self):
        return "".join(self.key.keys())
    def get_key(self):
        """
        Strings everything together.
        Unfortunatly it does not work.
        """
        while len(self.key.keys()) < 26:
            word   = self.choose_word()
            if word == "": break  # no more words with unknown chars
            regex  = self.translate_and_regex(word)
            cipher = self.match_ciphertext(regex)
            if cipher == "": continue
            self.extract_unknown(word, regex, cipher)
        return self.key_to_str()
    ## end Breaker
 if __name__ == "__main__":
    import sys, os
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('FILE')
    args = parser.parse_args()
-    # cannot import from a parent package if called directly
+    t = ''.join([x for x in open(args.FILE, "r").read().lower() if x.isalpha()])
-    # without modifying PYTHONPATH or sys.path
+    import os
-    file_dir = os.path.dirname(os.path.abspath(__file__))
+    words = open(os.path.abspath(os.path.dirname(__file__))+"/common.txt", "r").read().split('\n')
    file_parent_dir = os.path.dirname(file_dir)
    sys.path.append(file_parent_dir)
-    from libex01 import read_text
+    occurance_string = 'etaoinsrhdlucmfywgpbvkxqjz'
-    def parse_args(sys_argv):
+    occur_count = {chr(x+0x61): 0 for x in range(26)}
-        parser = argparse.ArgumentParser()
+    for char in t:
-        parser.add_argument("FILE")
+        occur_count[char]+=1
-        return parser.parse_args(sys_argv[1:])
+    mapping = {}
    sorted_occur = sorted(occur_count, key=occur_count.__getitem__, reverse=True)
    for i in range(26):
        mapping[occurance_string[i]]=sorted_occur[i]
    key = ''.join([x for _, x in sorted(mapping.items())])
    import re
    import mono
-    args = parse_args(sys.argv)
+    best_score = 0
-    txt = read_text(args.FILE)
+    best_key = key
    word_file = "common.txt"
-    bm = Breaker(txt, word_file)
+    trys = 0
    while trys < 1000:
        score = 0
        tmp = randomize_key(key)
        plain = mono.mono_decrypt(t, tmp)
        for word in words:
            if re.search(word, plain):
                score += 1
        if score > best_score:
            trys = 0
            best_score = score 
            best_key = key
            key=tmp
            print(best_score)
        else:
            trys+=1
-    print(bm.get_key())
+    print(best_key)
    print()
    print(mono.mono_decrypt(t, best_key))