From 5355e25ba37c4d229b8e96d0819c1517b69c3f3c Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Tue, 24 Nov 2020 13:12:25 +0100
Subject: [PATCH 1/7] =?UTF-8?q?F=C3=BCgt=20aktuellen=20Zustand=20von=20bre?=
 =?UTF-8?q?ak=5Fmono.py=20hinzu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 src/mono/break_mono.py

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
new file mode 100644
index 0000000..ddc7995
--- /dev/null
+++ b/src/mono/break_mono.py
@@ -0,0 +1,117 @@
+"""
+Python module to derive a key from an monoalphabetically encrypted file.
+"""
+import itertools as it
+import re
+
+from collections import OrderedDict
+from collections import Counter
+from string import ascii_lowercase
+
+#from libex01 import read_text
+
+def mono_break(enc_txt: str):
+
+    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
+
+    def get_frequency(text):
+        freq = Counter(text)
+        # Counter with lowercase ascii letters all having a count of 0
+        missing = Counter(dict(it.product(ascii_lowercase, [0])))
+        freq.update(missing)
+        return freq
+
+    def derive_alphabet(freq: Counter):
+        return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ))
+
+    subs = derive_alphabet(get_frequency(enc_txt))
+    dec = ""
+    for char in enc_txt:
+        dec += subs[char]
+    return dec
+
+## Frequenzanalyse
+EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
+
+def get_frequency(text):
+    freq = Counter(text)
+    # Counter with lowercase ascii letters all having a count of 0
+    missing = Counter(dict(it.product(ascii_lowercase, [0])))
+    freq.update(missing)
+    return freq
+
+def derive_alphabet_freq(freq: Counter):
+    most_freq = [ item[0] for item in freq.most_common() ]
+    #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+    return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+
+## Mustersuche
+def next_char_anchor(text: str, char):
+    """
+    Generator that takes the text and a char and yields positions of that char.
+    Adjust starting position by slicing.
+    Return generator closure.
+    """
+    index = text.find(char)
+    while index != -1:
+        yield (index, char)
+        index = text.find(char, index + 1)
+    return (index, char)
+
+
+def match_word(text: str, word: str, pos_iter):
+    """
+    Align with anchor and check hypothesis.
+    First hypothesis is the frequency analysis.
+    Align `word' with `text' for each anchor.
+    For remaining anchors check the occurence of pattern
+    and match with word. If it is true more than once, save it as a
+    new hypothesis.
+    """
+    pass
+
+def next_match(text: str, word: str, alphabet: dict, pos_iter: generator):
+    pass
+
+class Breaker():
+
+    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
+
+    def __init__(self, ciphertext, word_file):
+        self.alph = derive_alphabet_freq(get_frequency(ciphertext))
+        self.word_file = None  # TODO
+
+    def get_key(self):
+
+        def get_word_containing(char_list: list):
+            """
+            Find word from a word list file (common.txt) containing the chars
+            in `char_list'.
+            Return None it no word matches or
+            TUPLE(word, pos) where `pos' is a LIST of matching positions.
+            """
+            with open(self.word_file, 'r') as f:
+                for word in f:
+                    pos = []
+                    for char in char_list:
+                        pos += word.find(char)
+                    if -1 not in pos:
+                        return word, pos
+            return None
+
+        most_freq = next(iter(self.alph))  # most frequent char
+        word_having_char, pos = get_word_containing(most_freq)  # unpack
+
+
+
+        pass
+
+
+
+text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
+
+
+freq = get_frequency(text)
+alph = derive_alphabet(freq)
+
+print(alph.values())

From cde1cadba1c35afc5804c3fbeb7d6d97b5f12169 Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Tue, 24 Nov 2020 13:28:00 +0100
Subject: [PATCH 2/7] =?UTF-8?q?R=C3=A4umt=20break=5Fmono.py=20auf?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 104 ++++++++++++++-----------------------------------
 1 file changed, 29 insertions(+), 75 deletions(-)

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
index ddc7995..58f42c0 100644
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -8,12 +8,13 @@ from collections import OrderedDict
 from collections import Counter
 from string import ascii_lowercase
 
-#from libex01 import read_text
 
-def mono_break(enc_txt: str):
+class Breaker():
 
+    ## frequency analysis
     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 
+    @staticmethod
     def get_frequency(text):
         freq = Counter(text)
         # Counter with lowercase ascii letters all having a count of 0
@@ -21,88 +22,41 @@ def mono_break(enc_txt: str):
         freq.update(missing)
         return freq
 
-    def derive_alphabet(freq: Counter):
-        return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ))
-
-    subs = derive_alphabet(get_frequency(enc_txt))
-    dec = ""
-    for char in enc_txt:
-        dec += subs[char]
-    return dec
-
-## Frequenzanalyse
-EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
-
-def get_frequency(text):
-    freq = Counter(text)
-    # Counter with lowercase ascii letters all having a count of 0
-    missing = Counter(dict(it.product(ascii_lowercase, [0])))
-    freq.update(missing)
-    return freq
-
-def derive_alphabet_freq(freq: Counter):
-    most_freq = [ item[0] for item in freq.most_common() ]
-    #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
-    return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
-
-## Mustersuche
-def next_char_anchor(text: str, char):
-    """
-    Generator that takes the text and a char and yields positions of that char.
-    Adjust starting position by slicing.
-    Return generator closure.
-    """
-    index = text.find(char)
-    while index != -1:
-        yield (index, char)
-        index = text.find(char, index + 1)
-    return (index, char)
-
-
-def match_word(text: str, word: str, pos_iter):
-    """
-    Align with anchor and check hypothesis.
-    First hypothesis is the frequency analysis.
-    Align `word' with `text' for each anchor.
-    For remaining anchors check the occurence of pattern
-    and match with word. If it is true more than once, save it as a
-    new hypothesis.
-    """
-    pass
-
-def next_match(text: str, word: str, alphabet: dict, pos_iter: generator):
-    pass
 
-class Breaker():
+    @staticmethod
+    def derive_alphabet_freq(freq: Counter):
+        most_freq = [ item[0] for item in freq.most_common() ]
+        #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+        return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+
+
+    ## pattern matching
+    @staticmethod
+    def get_word_containing(word_file, char_list: list):
+        """
+        Find word from a word list file (common.txt) containing the chars
+        in `char_list'.
+        Return None it no word matches or
+        TUPLE(word, pos) where `pos' is a LIST of matching positions.
+        """
+        with open(word_file, 'r') as f:
+            for word in f:
+                pos = []
+                for char in char_list:
+                    pos += word.find(char)
+                if -1 not in pos:
+                    return word, pos
+        return None
 
-    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 
     def __init__(self, ciphertext, word_file):
         self.alph = derive_alphabet_freq(get_frequency(ciphertext))
         self.word_file = None  # TODO
 
-    def get_key(self):
-
-        def get_word_containing(char_list: list):
-            """
-            Find word from a word list file (common.txt) containing the chars
-            in `char_list'.
-            Return None it no word matches or
-            TUPLE(word, pos) where `pos' is a LIST of matching positions.
-            """
-            with open(self.word_file, 'r') as f:
-                for word in f:
-                    pos = []
-                    for char in char_list:
-                        pos += word.find(char)
-                    if -1 not in pos:
-                        return word, pos
-            return None
 
+    def get_key(self):
         most_freq = next(iter(self.alph))  # most frequent char
-        word_having_char, pos = get_word_containing(most_freq)  # unpack
-
-
+        word_having_char, pos = get_word_containing(self.word_file, most_freq)
 
         pass
 

From e3f854fa0c6581bb0402fe1fc0a1f0c98476e276 Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Tue, 24 Nov 2020 19:18:14 +0100
Subject: [PATCH 3/7] =?UTF-8?q?f=C3=BCgt=20match=5Fciphertext=20in=20break?=
 =?UTF-8?q?=5Fmono=20hinzu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 63 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
index 58f42c0..a362a8f 100644
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 """
 Python module to derive a key from an monoalphabetically encrypted file.
 """
@@ -27,7 +29,7 @@ class Breaker():
     def derive_alphabet_freq(freq: Counter):
         most_freq = [ item[0] for item in freq.most_common() ]
         #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
-        return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+        return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
 
 
     ## pattern matching
@@ -40,32 +42,65 @@ class Breaker():
         TUPLE(word, pos) where `pos' is a LIST of matching positions.
         """
         with open(word_file, 'r') as f:
-            for word in f:
+            for line in f:
+                word = line[:-1]
                 pos = []
                 for char in char_list:
-                    pos += word.find(char)
+                    pos.append(word.find(char))
                 if -1 not in pos:
                     return word, pos
-        return None
+        return None, None
+
+    @staticmethod
+    def positions(text: str, sub):
+        index = text.find(sub)
+        while index != -1:
+            yield index
+            index = text.find(sub, index + 1)
+        return index
 
 
+    @staticmethod
+    def match_ciphertext(text: str, word_pos: tuple, char: tuple):
+        word, wposl = word_pos
+        wpos = wposl[0]
+        wlen = len(word)
+
+        snip_count = Counter()
+        for pos in Breaker.positions(text, char):
+            word_begin = pos - wpos
+            snippet = text[word_begin : word_begin + wlen]
+
+            if snippet not in snip_count.elements():
+                snip_count[snippet] = text.count(snippet)
+
+        return snip_count.most_common(1)[0][0]
+
     def __init__(self, ciphertext, word_file):
-        self.alph = derive_alphabet_freq(get_frequency(ciphertext))
-        self.word_file = None  # TODO
+        self.ciphertext = ciphertext
+        self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
+        self.word_file = word_file
 
 
     def get_key(self):
-        most_freq = next(iter(self.alph))  # most frequent char
-        word_having_char, pos = get_word_containing(self.word_file, most_freq)
-
-        pass
 
 
 
-text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
+        # most frequent char in English and corresponding most common char in text
+        #most_freq = self.alph.popitem(last=False)
+        most_freq = next(iter(self.alph))
+        word_pos = Breaker.get_word_containing(
+            self.word_file,
+            #most_freq[0]
+            most_freq
+        )
+        most_common = Breaker.match_ciphertext(
+            self.ciphertext,
+            word_pos,
+            most_freq
+        )
 
+        print("most_freq", most_freq, "word_pos:", word_pos, "most_common:", most_common)
 
-freq = get_frequency(text)
-alph = derive_alphabet(freq)
 
-print(alph.values())
+    ## end Breaker

From 3331177041c77c990b0d345d84e130d0519dcba1 Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Tue, 24 Nov 2020 21:41:23 +0100
Subject: [PATCH 4/7] =?UTF-8?q?f=C3=BCgt=20choose=5Fknown=5Fletters=20hinz?=
 =?UTF-8?q?u.=20Vor=20rewrite=20match=5Fciphertext?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 51 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
index a362a8f..a1f94c2 100644
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -62,6 +62,9 @@ class Breaker():
 
     @staticmethod
     def match_ciphertext(text: str, word_pos: tuple, char: tuple):
+        """
+        asdf
+        """
         word, wposl = word_pos
         wpos = wposl[0]
         wlen = len(word)
@@ -76,6 +79,19 @@ class Breaker():
 
         return snip_count.most_common(1)[0][0]
 
+    @staticmethod
+    def choose_known_letters(key_alphabet):
+        letters = list(key_alphabet.keys())
+        if len(key_alphabet) < 3:
+            yield letters
+        else:
+            for i in letters:
+                for j in letters:
+                    for k in letters:
+                        if k == j or k == i or j == i: continue
+                        yield [i, j, k]
+        return None
+
     def __init__(self, ciphertext, word_file):
         self.ciphertext = ciphertext
         self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
@@ -84,23 +100,28 @@ class Breaker():
 
     def get_key(self):
 
-
+        key_alphabet = OrderedDict()
 
         # most frequent char in English and corresponding most common char in text
-        #most_freq = self.alph.popitem(last=False)
-        most_freq = next(iter(self.alph))
-        word_pos = Breaker.get_word_containing(
-            self.word_file,
-            #most_freq[0]
-            most_freq
-        )
-        most_common = Breaker.match_ciphertext(
-            self.ciphertext,
-            word_pos,
-            most_freq
-        )
-
-        print("most_freq", most_freq, "word_pos:", word_pos, "most_common:", most_common)
+        most_freq = self.alph[Breaker.EN_LETTER_FREQ]
+
+        key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq
+
+        while len(key_alphabet) < 26:
+
+            word_pos = Breaker.get_word_containing(
+                self.word_file,
+                next(Breaker.choose_known_letters(key_alphabet))
+            )
+
+            pass
+
+            most_common = Breaker.match_ciphertext(
+                self.ciphertext,
+                word_pos,
+                most_freq
+            )
 
+        pass
 
     ## end Breaker

From 12cca20112701e9f7e101c319e52b15fe562b150 Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Tue, 24 Nov 2020 23:52:14 +0100
Subject: [PATCH 5/7] break_mono.py wird komplett umgeschrieben

---
 src/mono/break_mono.py | 161 +++++++++++++++++++------------------------------
 1 file changed, 61 insertions(+), 100 deletions(-)

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
index a1f94c2..5cb2840 100644
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -3,125 +3,86 @@
 """
 Python module to derive a key from an monoalphabetically encrypted file.
 """
-import itertools as it
+
 import re
 
-from collections import OrderedDict
 from collections import Counter
-from string import ascii_lowercase
+
 
 
 class Breaker():
 
-    ## frequency analysis
-    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 
-    @staticmethod
-    def get_frequency(text):
-        freq = Counter(text)
-        # Counter with lowercase ascii letters all having a count of 0
-        missing = Counter(dict(it.product(ascii_lowercase, [0])))
-        freq.update(missing)
-        return freq
-
-
-    @staticmethod
-    def derive_alphabet_freq(freq: Counter):
-        most_freq = [ item[0] for item in freq.most_common() ]
-        #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
-        return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
-
-
-    ## pattern matching
-    @staticmethod
-    def get_word_containing(word_file, char_list: list):
-        """
-        Find word from a word list file (common.txt) containing the chars
-        in `char_list'.
-        Return None it no word matches or
-        TUPLE(word, pos) where `pos' is a LIST of matching positions.
-        """
-        with open(word_file, 'r') as f:
-            for line in f:
-                word = line[:-1]
-                pos = []
-                for char in char_list:
-                    pos.append(word.find(char))
-                if -1 not in pos:
-                    return word, pos
-        return None, None
-
-    @staticmethod
-    def positions(text: str, sub):
-        index = text.find(sub)
-        while index != -1:
-            yield index
-            index = text.find(sub, index + 1)
-        return index
-
-
-    @staticmethod
-    def match_ciphertext(text: str, word_pos: tuple, char: tuple):
-        """
-        asdf
-        """
-        word, wposl = word_pos
-        wpos = wposl[0]
-        wlen = len(word)
-
-        snip_count = Counter()
-        for pos in Breaker.positions(text, char):
-            word_begin = pos - wpos
-            snippet = text[word_begin : word_begin + wlen]
-
-            if snippet not in snip_count.elements():
-                snip_count[snippet] = text.count(snippet)
-
-        return snip_count.most_common(1)[0][0]
-
-    @staticmethod
-    def choose_known_letters(key_alphabet):
-        letters = list(key_alphabet.keys())
-        if len(key_alphabet) < 3:
-            yield letters
-        else:
-            for i in letters:
-                for j in letters:
-                    for k in letters:
-                        if k == j or k == i or j == i: continue
-                        yield [i, j, k]
-        return None
+    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 
     def __init__(self, ciphertext, word_file):
-        self.ciphertext = ciphertext
-        self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
-        self.word_file = word_file
 
+        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
 
-    def get_key(self):
+        # excuse me:
+        words = []
+        with open(word_file, 'r') as wf:
+            for line in wf:
+                words.append(line[:-1])  # remove trailing newline and append
+
+        self.text = ciphertext
+        self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
+        self.words = words
+
+    def choose_word(self):
+        known_chars = self.key.keys()
+        for i in range(len(self.words)):
+            word = self.words[i]
 
-        key_alphabet = OrderedDict()
+            if len(word) == 0: continue
 
-        # most frequent char in English and corresponding most common char in text
-        most_freq = self.alph[Breaker.EN_LETTER_FREQ]
+            # count known chars in word
+            n = 0
+            for char in word:
+                if char in known_chars:
+                    n +=1
 
-        key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq
+            # skip known words or words with too many unknown
+            if n == len(word) or n / len(word) < 0.3:
+                continue
 
-        while len(key_alphabet) < 26:
+            return self.words[i]
 
-            word_pos = Breaker.get_word_containing(
-                self.word_file,
-                next(Breaker.choose_known_letters(key_alphabet))
-            )
+    def translate_and_regex(self, word: str):
+        regex = ""
+        for char in word:
+            if char in self.key.keys():
+                regex = regex + self.key[char]
+            else:
+                regex = regex + "."
+        return regex
+
+    def match_ciphertext(self, regex):
+        rx = re.compile(regex)
+        count = Counter(re.findall(self.text))
+        return count.most_common(1)[0][0]
+
+    def extract_unknown(self, plain, regex, cipher):
+        assert len(plain) == len(regex) == len(cipher)
+
+        for i in range(len(regex)):
+            if regex[i] != ".": continue
+            self.key.update({ plain[i] : cipher[i] })
+
+        return self.key
+
+    def key_to_str(self):
+        return str(self.key.keys())
+
+    def get_key(self):
 
-            pass
+        while len(self.key.keys()) < 26:
 
-            most_common = Breaker.match_ciphertext(
-                self.ciphertext,
-                word_pos,
-                most_freq
-            )
+            word   = self.choose_word()
+            regex  = self.translate_and_regex(word)
+            cipher = self.match_ciphertext(regex)
+            print(word, regex, cipher, self.extract_unknown())
 
-        pass
+        return self.key_to_str()
 
     ## end Breaker

From afb6faabbc486833a4ebbd091d7abb2f29712df2 Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Wed, 25 Nov 2020 02:15:30 +0100
Subject: [PATCH 6/7] =?UTF-8?q?f=C3=BCgt=20diverse=20Bugfixes=20hinzu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 57 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
index 5cb2840..e8b7aaa 100644
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
 import re
 
 from collections import Counter
+#from string import ascii_lowercase
 
-
+import pdb
 
 class Breaker():
 
-
     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 
-    def __init__(self, ciphertext, word_file):
-
-        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
-
-        # excuse me:
+    @staticmethod
+    def read_word_file(word_file):
+        # excuse me
         words = []
         with open(word_file, 'r') as wf:
             for line in wf:
-                words.append(line[:-1])  # remove trailing newline and append
+                word = line[:-1] # remove trailing newline
+                word = word.lower()
+
+                if word == "" or not word.isalpha():
+                    continue
+
+                words.append(word)
+        return words
+
+    def __init__(self, ciphertext, word_file):
+
+        most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
 
         self.text = ciphertext
         self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
-        self.words = words
+        self.words = Breaker.read_word_file(word_file)
 
     def choose_word(self):
         known_chars = self.key.keys()
         for i in range(len(self.words)):
             word = self.words[i]
 
-            if len(word) == 0: continue
+            if len(word) == 0:
+                self.words.pop(i)
+                continue
 
             # count known chars in word
             n = 0
@@ -42,11 +53,15 @@ class Breaker():
                 if char in known_chars:
                     n +=1
 
-            # skip known words or words with too many unknown
-            if n == len(word) or n / len(word) < 0.3:
+            # remove known words
+            if n == len(word):
+                self.words.pop(i)
                 continue
 
-            return self.words[i]
+            # skip words with too many unknown chars
+            if (n / len(word) < 0.3): continue
+
+            return self.words.pop(i)
 
     def translate_and_regex(self, word: str):
         regex = ""
@@ -58,12 +73,16 @@ class Breaker():
         return regex
 
     def match_ciphertext(self, regex):
-        rx = re.compile(regex)
-        count = Counter(re.findall(self.text))
-        return count.most_common(1)[0][0]
+        regexc = re.compile(regex)
+        count = Counter(regexc.findall(self.text))
+        if len(count) == 0:
+            return ""
+        else:
+            return count.most_common(1)[0][0]
 
     def extract_unknown(self, plain, regex, cipher):
         assert len(plain) == len(regex) == len(cipher)
+        assert "." in regex
 
         for i in range(len(regex)):
             if regex[i] != ".": continue
@@ -81,7 +100,11 @@ class Breaker():
             word   = self.choose_word()
             regex  = self.translate_and_regex(word)
             cipher = self.match_ciphertext(regex)
-            print(word, regex, cipher, self.extract_unknown())
+
+            if cipher == "": continue
+
+            print(word, regex, cipher,
+                  self.extract_unknown(word, regex, cipher))
 
         return self.key_to_str()
 

From ed3d03d599962bf7e09b0cd3e29a28c8eac68ce8 Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Wed, 25 Nov 2020 03:55:50 +0100
Subject: [PATCH 7/7] =?UTF-8?q?F=C3=BCgt=20Dokumentation=20hinzu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 91 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 75 insertions(+), 16 deletions(-)
 mode change 100644 => 100755 src/mono/break_mono.py

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
old mode 100644
new mode 100755
index e8b7aaa..4bb44a1
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -2,22 +2,27 @@
 
 """
 Python module to derive a key from an monoalphabetically encrypted file.
+Does not work yet.
 """
 
 import re
-
 from collections import Counter
-#from string import ascii_lowercase
-
-import pdb
 
 class Breaker():
+    """
+    A handle on the various bits of data needed to derive the key from the ciphertext.
+    """
 
     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 
     @staticmethod
     def read_word_file(word_file):
-        # excuse me
+        """
+        Helper function to read the words file into memory. The rationale was that
+        querying would be faster and it would be possible to delete used words
+        without mutating the file.
+        """
+
         words = []
         with open(word_file, 'r') as wf:
             for line in wf:
@@ -32,6 +37,7 @@ class Breaker():
 
     def __init__(self, ciphertext, word_file):
 
+        # count chars in the ciphertext
         most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
 
         self.text = ciphertext
@@ -39,12 +45,18 @@ class Breaker():
         self.words = Breaker.read_word_file(word_file)
 
     def choose_word(self):
+        """
+        Iterate through the word list and pick a word for pattern matching.
+        Words with chars that are completely known are are being removed.
+        Words where less then a third of the chars are known are being skipped.
+        """
         known_chars = self.key.keys()
-        for i in range(len(self.words)):
-            word = self.words[i]
+        for word in self.words:
+
+            word = word + ""  # copy
 
             if len(word) == 0:
-                self.words.pop(i)
+                self.words.remove(word)
                 continue
 
             # count known chars in word
@@ -55,15 +67,22 @@ class Breaker():
 
             # remove known words
             if n == len(word):
-                self.words.pop(i)
+                self.words.remove(word)
                 continue
 
             # skip words with too many unknown chars
             if (n / len(word) < 0.3): continue
 
-            return self.words.pop(i)
+            self.words.remove(word)
+            return word
+
+        return ""
 
     def translate_and_regex(self, word: str):
+        """
+        Prepare chosen word for pattern matching.
+        Translate the known characters and replace the others with a regex '.'
+        """
         regex = ""
         for char in word:
             if char in self.key.keys():
@@ -73,6 +92,11 @@ class Breaker():
         return regex
 
     def match_ciphertext(self, regex):
+        """
+        Compile the translated chosen word to a regular expression and find all
+        matches inside the ciphertext. Count the occurences and pick the most
+        frequent one.
+        """
         regexc = re.compile(regex)
         count = Counter(regexc.findall(self.text))
         if len(count) == 0:
@@ -81,6 +105,12 @@ class Breaker():
             return count.most_common(1)[0][0]
 
     def extract_unknown(self, plain, regex, cipher):
+        """
+        Compare the the chosen words in it's various forms to infer which
+        new characters may be added to the alphabet map.
+        The dots inside `regex' symbolize the positions of unknown characters
+        and provide a mapping between `plain' and `cipher' text.
+        """
         assert len(plain) == len(regex) == len(cipher)
         assert "." in regex
 
@@ -91,21 +121,50 @@ class Breaker():
         return self.key
 
     def key_to_str(self):
-        return str(self.key.keys())
+        return "".join(self.key.keys())
 
     def get_key(self):
-
+        """
+        Strings everything together.
+        Unfortunatly it does not work.
+        """
         while len(self.key.keys()) < 26:
 
             word   = self.choose_word()
+
+            if word == "": break  # no more words with unknown chars
+
             regex  = self.translate_and_regex(word)
             cipher = self.match_ciphertext(regex)
 
             if cipher == "": continue
-
-            print(word, regex, cipher,
-                  self.extract_unknown(word, regex, cipher))
+            self.extract_unknown(word, regex, cipher)
 
         return self.key_to_str()
-
     ## end Breaker
+
+if __name__ == "__main__":
+    import sys, os
+    import argparse
+
+    # cannot import from a parent package if called directly
+    # without modifying PYTHONPATH or sys.path
+    file_dir = os.path.dirname(os.path.abspath(__file__))
+    file_parent_dir = os.path.dirname(file_dir)
+    sys.path.append(file_parent_dir)
+
+    from libex01 import read_text
+
+    def parse_args(sys_argv):
+        parser = argparse.ArgumentParser()
+        parser.add_argument("FILE")
+        return parser.parse_args(sys_argv[1:])
+
+
+    args = parse_args(sys.argv)
+    txt = read_text(args.FILE)
+    word_file = "common.txt"
+
+    bm = Breaker(txt, word_file)
+
+    print(bm.get_key())