Fügt aktuellen Zustand von break_mono.py hinzu

2020-11-24 13:12:25 +01:00
parent 98a78d3d35
commit 5355e25ba3
1 changed files with 117 additions and 0 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -0,0 +1,117 @@
 """
 Python module to derive a key from an monoalphabetically encrypted file.
 """
 import itertools as it
 import re
 from collections import OrderedDict
 from collections import Counter
 from string import ascii_lowercase
 #from libex01 import read_text
 def mono_break(enc_txt: str):
    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
    def get_frequency(text):
        freq = Counter(text)
        # Counter with lowercase ascii letters all having a count of 0
        missing = Counter(dict(it.product(ascii_lowercase, [0])))
        freq.update(missing)
        return freq
    def derive_alphabet(freq: Counter):
        return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ))
    subs = derive_alphabet(get_frequency(enc_txt))
    dec = ""
    for char in enc_txt:
        dec += subs[char]
    return dec
 ## Frequenzanalyse
 EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
 def get_frequency(text):
    freq = Counter(text)
    # Counter with lowercase ascii letters all having a count of 0
    missing = Counter(dict(it.product(ascii_lowercase, [0])))
    freq.update(missing)
    return freq
 def derive_alphabet_freq(freq: Counter):
    most_freq = [ item[0] for item in freq.most_common() ]
    #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
    return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
 ## Mustersuche
 def next_char_anchor(text: str, char):
    """
    Generator that takes the text and a char and yields positions of that char.
    Adjust starting position by slicing.
    Return generator closure.
    """
    index = text.find(char)
    while index != -1:
        yield (index, char)
        index = text.find(char, index + 1)
    return (index, char)
 def match_word(text: str, word: str, pos_iter):
    """
    Align with anchor and check hypothesis.
    First hypothesis is the frequency analysis.
    Align `word' with `text' for each anchor.
    For remaining anchors check the occurence of pattern
    and match with word. If it is true more than once, save it as a
    new hypothesis.
    """
    pass
 def next_match(text: str, word: str, alphabet: dict, pos_iter: generator):
    pass
 class Breaker():
    EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
    def __init__(self, ciphertext, word_file):
        self.alph = derive_alphabet_freq(get_frequency(ciphertext))
        self.word_file = None  # TODO
    def get_key(self):
        def get_word_containing(char_list: list):
            """
            Find word from a word list file (common.txt) containing the chars
            in `char_list'.
            Return None it no word matches or
            TUPLE(word, pos) where `pos' is a LIST of matching positions.
            """
            with open(self.word_file, 'r') as f:
                for word in f:
                    pos = []
                    for char in char_list:
                        pos += word.find(char)
                    if -1 not in pos:
                        return word, pos
            return None
        most_freq = next(iter(self.alph))  # most frequent char
        word_having_char, pos = get_word_containing(most_freq)  # unpack
        pass
 text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
 freq = get_frequency(text)
 alph = derive_alphabet(freq)
 print(alph.values())