fügt match_ciphertext in break_mono hinzu

2020-11-24 19:18:14 +01:00
parent cde1cadba1
commit e3f854fa0c
1 changed files with 50 additions and 15 deletions
--- a/src/mono/break_mono.py
+++ b/src/mono/break_mono.py
@@ -1,3 +1,5 @@
 #!/usr/bin/env python
 """
 Python module to derive a key from an monoalphabetically encrypted file.
 """
@@ -27,7 +29,7 @@ class Breaker():
    def derive_alphabet_freq(freq: Counter):
        most_freq = [ item[0] for item in freq.most_common() ]
        #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
-        return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
+        return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
    ## pattern matching
@@ -40,32 +42,65 @@ class Breaker():
        TUPLE(word, pos) where `pos' is a LIST of matching positions.
        """
        with open(word_file, 'r') as f:
-            for word in f:
+            for line in f:
                word = line[:-1]
                pos = []
                for char in char_list:
-                    pos += word.find(char)
+                    pos.append(word.find(char))
                if -1 not in pos:
                    return word, pos
-        return None
+        return None, None
    @staticmethod
    def positions(text: str, sub):
        index = text.find(sub)
        while index != -1:
            yield index
            index = text.find(sub, index + 1)
        return index
    @staticmethod
    def match_ciphertext(text: str, word_pos: tuple, char: tuple):
        word, wposl = word_pos
        wpos = wposl[0]
        wlen = len(word)
        snip_count = Counter()
        for pos in Breaker.positions(text, char):
            word_begin = pos - wpos
            snippet = text[word_begin : word_begin + wlen]
            if snippet not in snip_count.elements():
                snip_count[snippet] = text.count(snippet)
        return snip_count.most_common(1)[0][0]
    def __init__(self, ciphertext, word_file):
-        self.alph = derive_alphabet_freq(get_frequency(ciphertext))
+        self.ciphertext = ciphertext
-        self.word_file = None  # TODO
+        self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
        self.word_file = word_file
    def get_key(self):
        most_freq = next(iter(self.alph))  # most frequent char
        word_having_char, pos = get_word_containing(self.word_file, most_freq)
        pass
-text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
+        # most frequent char in English and corresponding most common char in text
        #most_freq = self.alph.popitem(last=False)
        most_freq = next(iter(self.alph))
        word_pos = Breaker.get_word_containing(
            self.word_file,
            #most_freq[0]
            most_freq
        )
        most_common = Breaker.match_ciphertext(
            self.ciphertext,
            word_pos,
            most_freq
        )
        print("most_freq", most_freq, "word_pos:", word_pos, "most_common:", most_common)
-freq = get_frequency(text)
+    ## end Breaker
 alph = derive_alphabet(freq)
 print(alph.values())