From 5355e25ba37c4d229b8e96d0819c1517b69c3f3c Mon Sep 17 00:00:00 2001
From: Daniel Tschertkow <daniel.tschertkow@posteo.de>
Date: Tue, 24 Nov 2020 13:12:25 +0100
Subject: [PATCH] =?UTF-8?q?F=C3=BCgt=20aktuellen=20Zustand=20von=20break?=
 =?UTF-8?q?=5Fmono.py=20hinzu?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/mono/break_mono.py | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 src/mono/break_mono.py

diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py
new file mode 100644
index 0000000..ddc7995
--- /dev/null
+++ b/src/mono/break_mono.py
@@ -0,0 +1,117 @@
+"""
+Python module to derive a key from an monoalphabetically encrypted file.
+"""
+import itertools as it
+import re
+
+from collections import OrderedDict
+from collections import Counter
+from string import ascii_lowercase
+
+#from libex01 import read_text
+
+def mono_break(enc_txt: str):
+
+    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
+
+    def get_frequency(text):
+        freq = Counter(text)
+        # Counter with lowercase ascii letters all having a count of 0
+        missing = Counter(dict(it.product(ascii_lowercase, [0])))
+        freq.update(missing)
+        return freq
+
+    def derive_alphabet(freq: Counter):
+        return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ))
+
+    subs = derive_alphabet(get_frequency(enc_txt))
+    dec = ""
+    for char in enc_txt:
+        dec += subs[char]
+    return dec
+
+## Frequenzanalyse
+EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
+
+def get_frequency(text):
+    freq = Counter(text)
+    # Counter with lowercase ascii letters all having a count of 0
+    missing = Counter(dict(it.product(ascii_lowercase, [0])))
+    freq.update(missing)
+    return freq
+
+def derive_alphabet_freq(freq: Counter):
+    most_freq = [ item[0] for item in freq.most_common() ]
+    #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+    return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+
+## Mustersuche
+def next_char_anchor(text: str, char):
+    """
+    Generator that takes the text and a char and yields positions of that char.
+    Adjust starting position by slicing.
+    Return generator closure.
+    """
+    index = text.find(char)
+    while index != -1:
+        yield (index, char)
+        index = text.find(char, index + 1)
+    return (index, char)
+
+
+def match_word(text: str, word: str, pos_iter):
+    """
+    Align with anchor and check hypothesis.
+    First hypothesis is the frequency analysis.
+    Align `word' with `text' for each anchor.
+    For remaining anchors check the occurence of pattern
+    and match with word. If it is true more than once, save it as a
+    new hypothesis.
+    """
+    pass
+
+def next_match(text: str, word: str, alphabet: dict, pos_iter: generator):
+    pass
+
+class Breaker():
+
+    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
+
+    def __init__(self, ciphertext, word_file):
+        self.alph = derive_alphabet_freq(get_frequency(ciphertext))
+        self.word_file = None  # TODO
+
+    def get_key(self):
+
+        def get_word_containing(char_list: list):
+            """
+            Find word from a word list file (common.txt) containing the chars
+            in `char_list'.
+            Return None it no word matches or
+            TUPLE(word, pos) where `pos' is a LIST of matching positions.
+            """
+            with open(self.word_file, 'r') as f:
+                for word in f:
+                    pos = []
+                    for char in char_list:
+                        pos += word.find(char)
+                    if -1 not in pos:
+                        return word, pos
+            return None
+
+        most_freq = next(iter(self.alph))  # most frequent char
+        word_having_char, pos = get_word_containing(most_freq)  # unpack
+
+
+
+        pass
+
+
+
+text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
+
+
+freq = get_frequency(text)
+alph = derive_alphabet(freq)
+
+print(alph.values())