From 0004679a83e01ca3bb007f167a5eacf55d5f7f31 Mon Sep 17 00:00:00 2001
From: Eggert Jung <eggert.s.jung@wischhof13.de>
Date: Wed, 25 Nov 2020 11:28:40 +0100
Subject: [PATCH] alternative version of break mono

dooing something, poorly working
---
 src/mono/break_mono.py | 217 ++++++++++++-------------------------------------
 1 file changed, 53 insertions(+), 164 deletions(-)
 mode change 100755 => 100644 src/mono/break_mono.py

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
old mode 100755
new mode 100644
index 4bb44a1..fc92e4c
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -1,170 +1,59 @@
-#!/usr/bin/env python
+from random import randint as rand
+def randomize_key(key):
+    a_index = rand(0, 25)
+    b_index = rand(0, 25)
+    a = key[a_index]
+    b = key[b_index]
+    key = key[:a_index] + b + key[a_index + 1:]
+    key = key[:b_index] + a + key[b_index + 1:]
 
-"""
-Python module to derive a key from an monoalphabetically encrypted file.
-Does not work yet.
-"""
+    return key
 
-import re
-from collections import Counter
-
-class Breaker():
-    """
-    A handle on the various bits of data needed to derive the key from the ciphertext.
-    """
-
-    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
-
-    @staticmethod
-    def read_word_file(word_file):
-        """
-        Helper function to read the words file into memory. The rationale was that
-        querying would be faster and it would be possible to delete used words
-        without mutating the file.
-        """
-
-        words = []
-        with open(word_file, 'r') as wf:
-            for line in wf:
-                word = line[:-1] # remove trailing newline
-                word = word.lower()
-
-                if word == "" or not word.isalpha():
-                    continue
-
-                words.append(word)
-        return words
-
-    def __init__(self, ciphertext, word_file):
-
-        # count chars in the ciphertext
-        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
-
-        self.text = ciphertext
-        self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
-        self.words = Breaker.read_word_file(word_file)
-
-    def choose_word(self):
-        """
-        Iterate through the word list and pick a word for pattern matching.
-        Words with chars that are completely known are are being removed.
-        Words where less then a third of the chars are known are being skipped.
-        """
-        known_chars = self.key.keys()
-        for word in self.words:
-
-            word = word + ""  # copy
-
-            if len(word) == 0:
-                self.words.remove(word)
-                continue
-
-            # count known chars in word
-            n = 0
-            for char in word:
-                if char in known_chars:
-                    n +=1
-
-            # remove known words
-            if n == len(word):
-                self.words.remove(word)
-                continue
-
-            # skip words with too many unknown chars
-            if (n / len(word) < 0.3): continue
-
-            self.words.remove(word)
-            return word
-
-        return ""
-
-    def translate_and_regex(self, word: str):
-        """
-        Prepare chosen word for pattern matching.
-        Translate the known characters and replace the others with a regex '.'
-        """
-        regex = ""
-        for char in word:
-            if char in self.key.keys():
-                regex = regex + self.key[char]
-            else:
-                regex = regex + "."
-        return regex
-
-    def match_ciphertext(self, regex):
-        """
-        Compile the translated chosen word to a regular expression and find all
-        matches inside the ciphertext. Count the occurences and pick the most
-        frequent one.
-        """
-        regexc = re.compile(regex)
-        count = Counter(regexc.findall(self.text))
-        if len(count) == 0:
-            return ""
-        else:
-            return count.most_common(1)[0][0]
-
-    def extract_unknown(self, plain, regex, cipher):
-        """
-        Compare the the chosen words in it's various forms to infer which
-        new characters may be added to the alphabet map.
-        The dots inside `regex' symbolize the positions of unknown characters
-        and provide a mapping between `plain' and `cipher' text.
-        """
-        assert len(plain) == len(regex) == len(cipher)
-        assert "." in regex
-
-        for i in range(len(regex)):
-            if regex[i] != ".": continue
-            self.key.update({ plain[i] : cipher[i] })
-
-        return self.key
-
-    def key_to_str(self):
-        return "".join(self.key.keys())
-
-    def get_key(self):
-        """
-        Strings everything together.
-        Unfortunatly it does not work.
-        """
-        while len(self.key.keys()) < 26:
-
-            word   = self.choose_word()
-
-            if word == "": break  # no more words with unknown chars
-
-            regex  = self.translate_and_regex(word)
-            cipher = self.match_ciphertext(regex)
-
-            if cipher == "": continue
-            self.extract_unknown(word, regex, cipher)
-
-        return self.key_to_str()
-    ## end Breaker
 
 if __name__ == "__main__":
-    import sys, os
     import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('FILE')
+    args = parser.parse_args()
+
+    t = ''.join([x for x in open(args.FILE, "r").read().lower() if x.isalpha()])
+    import os
+    words = open(os.path.abspath(os.path.dirname(__file__))+"/common.txt", "r").read().split('\n')
+
+    occurance_string = 'etaoinsrhdlucmfywgpbvkxqjz'
+
+    occur_count = {chr(x+0x61): 0 for x in range(26)}
+    for char in t:
+        occur_count[char]+=1
+    mapping = {}
+    sorted_occur = sorted(occur_count, key=occur_count.__getitem__, reverse=True)
+    for i in range(26):
+        mapping[occurance_string[i]]=sorted_occur[i]
+    key = ''.join([x for _, x in sorted(mapping.items())])
+
+    import re
+    import mono
+
+    best_score = 0
+    best_key = key
+
+    trys = 0
+    while trys < 1000:
+        score = 0
+        tmp = randomize_key(key)
+        plain = mono.mono_decrypt(t, tmp)
+        for word in words:
+            if re.search(word, plain):
+                score += 1
+        if score > best_score:
+            trys = 0
+            best_score = score 
+            best_key = key
+            key=tmp
+            print(best_score)
+        else:
+            trys+=1
 
-    # cannot import from a parent package if called directly
-    # without modifying PYTHONPATH or sys.path
-    file_dir = os.path.dirname(os.path.abspath(__file__))
-    file_parent_dir = os.path.dirname(file_dir)
-    sys.path.append(file_parent_dir)
-
-    from libex01 import read_text
-
-    def parse_args(sys_argv):
-        parser = argparse.ArgumentParser()
-        parser.add_argument("FILE")
-        return parser.parse_args(sys_argv[1:])
-
-
-    args = parse_args(sys.argv)
-    txt = read_text(args.FILE)
-    word_file = "common.txt"
-
-    bm = Breaker(txt, word_file)
-
-    print(bm.get_key())
+    print(best_key)
+    print()
+    print(mono.mono_decrypt(t, best_key))