Räumt break_mono.py auf

This commit is contained in:
2020-11-24 13:28:00 +01:00
parent 5355e25ba3
commit cde1cadba1

View File

@@ -8,12 +8,13 @@ from collections import OrderedDict
from collections import Counter from collections import Counter
from string import ascii_lowercase from string import ascii_lowercase
#from libex01 import read_text
def mono_break(enc_txt: str): class Breaker():
## frequency analysis
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
@staticmethod
def get_frequency(text): def get_frequency(text):
freq = Counter(text) freq = Counter(text)
# Counter with lowercase ascii letters all having a count of 0 # Counter with lowercase ascii letters all having a count of 0
@@ -21,88 +22,41 @@ def mono_break(enc_txt: str):
freq.update(missing) freq.update(missing)
return freq return freq
def derive_alphabet(freq: Counter):
return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ))
subs = derive_alphabet(get_frequency(enc_txt)) @staticmethod
dec = "" def derive_alphabet_freq(freq: Counter):
for char in enc_txt: most_freq = [ item[0] for item in freq.most_common() ]
dec += subs[char] #return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
return dec return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
## Frequenzanalyse
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
def get_frequency(text):
freq = Counter(text)
# Counter with lowercase ascii letters all having a count of 0
missing = Counter(dict(it.product(ascii_lowercase, [0])))
freq.update(missing)
return freq
def derive_alphabet_freq(freq: Counter):
most_freq = [ item[0] for item in freq.most_common() ]
#return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
## Mustersuche
def next_char_anchor(text: str, char):
"""
Generator that takes the text and a char and yields positions of that char.
Adjust starting position by slicing.
Return generator closure.
"""
index = text.find(char)
while index != -1:
yield (index, char)
index = text.find(char, index + 1)
return (index, char)
def match_word(text: str, word: str, pos_iter): ## pattern matching
""" @staticmethod
Align with anchor and check hypothesis. def get_word_containing(word_file, char_list: list):
First hypothesis is the frequency analysis. """
Align `word' with `text' for each anchor. Find word from a word list file (common.txt) containing the chars
For remaining anchors check the occurence of pattern in `char_list'.
and match with word. If it is true more than once, save it as a Return None it no word matches or
new hypothesis. TUPLE(word, pos) where `pos' is a LIST of matching positions.
""" """
pass with open(word_file, 'r') as f:
for word in f:
pos = []
for char in char_list:
pos += word.find(char)
if -1 not in pos:
return word, pos
return None
def next_match(text: str, word: str, alphabet: dict, pos_iter: generator):
pass
class Breaker():
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
def __init__(self, ciphertext, word_file): def __init__(self, ciphertext, word_file):
self.alph = derive_alphabet_freq(get_frequency(ciphertext)) self.alph = derive_alphabet_freq(get_frequency(ciphertext))
self.word_file = None # TODO self.word_file = None # TODO
def get_key(self): def get_key(self):
def get_word_containing(char_list: list):
"""
Find word from a word list file (common.txt) containing the chars
in `char_list'.
Return None it no word matches or
TUPLE(word, pos) where `pos' is a LIST of matching positions.
"""
with open(self.word_file, 'r') as f:
for word in f:
pos = []
for char in char_list:
pos += word.find(char)
if -1 not in pos:
return word, pos
return None
most_freq = next(iter(self.alph)) # most frequent char most_freq = next(iter(self.alph)) # most frequent char
word_having_char, pos = get_word_containing(most_freq) # unpack word_having_char, pos = get_word_containing(self.word_file, most_freq)
pass pass