diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py index 58f42c0..a362a8f 100644 --- a/src/mono/break_mono.py +++ b/src/mono/break_mono.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + """ Python module to derive a key from an monoalphabetically encrypted file. """ @@ -27,7 +29,7 @@ class Breaker(): def derive_alphabet_freq(freq: Counter): most_freq = [ item[0] for item in freq.most_common() ] #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) - return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) + return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq)) ## pattern matching @@ -40,32 +42,65 @@ class Breaker(): TUPLE(word, pos) where `pos' is a LIST of matching positions. """ with open(word_file, 'r') as f: - for word in f: + for line in f: + word = line[:-1] pos = [] for char in char_list: - pos += word.find(char) + pos.append(word.find(char)) if -1 not in pos: return word, pos - return None + return None, None + + @staticmethod + def positions(text: str, sub): + index = text.find(sub) + while index != -1: + yield index + index = text.find(sub, index + 1) + return index + @staticmethod + def match_ciphertext(text: str, word_pos: tuple, char: tuple): + word, wposl = word_pos + wpos = wposl[0] + wlen = len(word) + + snip_count = Counter() + for pos in Breaker.positions(text, char): + word_begin = pos - wpos + snippet = text[word_begin : word_begin + wlen] + + if snippet not in snip_count.elements(): + snip_count[snippet] = text.count(snippet) + + return snip_count.most_common(1)[0][0] + def __init__(self, ciphertext, word_file): - self.alph = derive_alphabet_freq(get_frequency(ciphertext)) - self.word_file = None # TODO + self.ciphertext = ciphertext + self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext)) + self.word_file = word_file def get_key(self): - most_freq = next(iter(self.alph)) # most frequent char - word_having_char, pos = get_word_containing(self.word_file, most_freq) - - pass -text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt" + # most frequent char in English and corresponding most common char in text + #most_freq = self.alph.popitem(last=False) + most_freq = next(iter(self.alph)) + word_pos = Breaker.get_word_containing( + self.word_file, + #most_freq[0] + most_freq + ) + most_common = Breaker.match_ciphertext( + self.ciphertext, + word_pos, + most_freq + ) + print("most_freq", most_freq, "word_pos:", word_pos, "most_common:", most_common) -freq = get_frequency(text) -alph = derive_alphabet(freq) -print(alph.values()) + ## end Breaker