From 5355e25ba37c4d229b8e96d0819c1517b69c3f3c Mon Sep 17 00:00:00 2001 From: Daniel Tschertkow Date: Tue, 24 Nov 2020 13:12:25 +0100 Subject: [PATCH] =?UTF-8?q?F=C3=BCgt=20aktuellen=20Zustand=20von=20break?= =?UTF-8?q?=5Fmono.py=20hinzu?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mono/break_mono.py | 117 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 src/mono/break_mono.py diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py new file mode 100644 index 0000000..ddc7995 --- /dev/null +++ b/src/mono/break_mono.py @@ -0,0 +1,117 @@ +""" +Python module to derive a key from an monoalphabetically encrypted file. +""" +import itertools as it +import re + +from collections import OrderedDict +from collections import Counter +from string import ascii_lowercase + +#from libex01 import read_text + +def mono_break(enc_txt: str): + + EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") + + def get_frequency(text): + freq = Counter(text) + # Counter with lowercase ascii letters all having a count of 0 + missing = Counter(dict(it.product(ascii_lowercase, [0]))) + freq.update(missing) + return freq + + def derive_alphabet(freq: Counter): + return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ)) + + subs = derive_alphabet(get_frequency(enc_txt)) + dec = "" + for char in enc_txt: + dec += subs[char] + return dec + +## Frequenzanalyse +EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") + +def get_frequency(text): + freq = Counter(text) + # Counter with lowercase ascii letters all having a count of 0 + missing = Counter(dict(it.product(ascii_lowercase, [0]))) + freq.update(missing) + return freq + +def derive_alphabet_freq(freq: Counter): + most_freq = [ item[0] for item in freq.most_common() ] + #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) + return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) + +## Mustersuche +def next_char_anchor(text: str, char): + """ + Generator that takes the text and a char and yields positions of that char. + Adjust starting position by slicing. + Return generator closure. + """ + index = text.find(char) + while index != -1: + yield (index, char) + index = text.find(char, index + 1) + return (index, char) + + +def match_word(text: str, word: str, pos_iter): + """ + Align with anchor and check hypothesis. + First hypothesis is the frequency analysis. + Align `word' with `text' for each anchor. + For remaining anchors check the occurence of pattern + and match with word. If it is true more than once, save it as a + new hypothesis. + """ + pass + +def next_match(text: str, word: str, alphabet: dict, pos_iter: generator): + pass + +class Breaker(): + + EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") + + def __init__(self, ciphertext, word_file): + self.alph = derive_alphabet_freq(get_frequency(ciphertext)) + self.word_file = None # TODO + + def get_key(self): + + def get_word_containing(char_list: list): + """ + Find word from a word list file (common.txt) containing the chars + in `char_list'. + Return None it no word matches or + TUPLE(word, pos) where `pos' is a LIST of matching positions. + """ + with open(self.word_file, 'r') as f: + for word in f: + pos = [] + for char in char_list: + pos += word.find(char) + if -1 not in pos: + return word, pos + return None + + most_freq = next(iter(self.alph)) # most frequent char + word_having_char, pos = get_word_containing(most_freq) # unpack + + + + pass + + + +text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt" + + +freq = get_frequency(text) +alph = derive_alphabet(freq) + +print(alph.values())