break_mono.py wird komplett umgeschrieben

breakmono2
Daniel Tschertkow 5 years ago
parent 3331177041
commit 12cca20112

@ -3,125 +3,86 @@
""" """
Python module to derive a key from an monoalphabetically encrypted file. Python module to derive a key from an monoalphabetically encrypted file.
""" """
import itertools as it
import re import re
from collections import OrderedDict
from collections import Counter from collections import Counter
from string import ascii_lowercase
class Breaker(): class Breaker():
## frequency analysis
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
@staticmethod EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
def get_frequency(text):
freq = Counter(text)
# Counter with lowercase ascii letters all having a count of 0
missing = Counter(dict(it.product(ascii_lowercase, [0])))
freq.update(missing)
return freq
@staticmethod
def derive_alphabet_freq(freq: Counter):
most_freq = [ item[0] for item in freq.most_common() ]
#return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
## pattern matching
@staticmethod
def get_word_containing(word_file, char_list: list):
"""
Find word from a word list file (common.txt) containing the chars
in `char_list'.
Return None it no word matches or
TUPLE(word, pos) where `pos' is a LIST of matching positions.
"""
with open(word_file, 'r') as f:
for line in f:
word = line[:-1]
pos = []
for char in char_list:
pos.append(word.find(char))
if -1 not in pos:
return word, pos
return None, None
@staticmethod
def positions(text: str, sub):
index = text.find(sub)
while index != -1:
yield index
index = text.find(sub, index + 1)
return index
@staticmethod
def match_ciphertext(text: str, word_pos: tuple, char: tuple):
"""
asdf
"""
word, wposl = word_pos
wpos = wposl[0]
wlen = len(word)
snip_count = Counter()
for pos in Breaker.positions(text, char):
word_begin = pos - wpos
snippet = text[word_begin : word_begin + wlen]
if snippet not in snip_count.elements():
snip_count[snippet] = text.count(snippet)
return snip_count.most_common(1)[0][0]
@staticmethod
def choose_known_letters(key_alphabet):
letters = list(key_alphabet.keys())
if len(key_alphabet) < 3:
yield letters
else:
for i in letters:
for j in letters:
for k in letters:
if k == j or k == i or j == i: continue
yield [i, j, k]
return None
def __init__(self, ciphertext, word_file): def __init__(self, ciphertext, word_file):
self.ciphertext = ciphertext
self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
self.word_file = word_file
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
def get_key(self): # excuse me:
words = []
with open(word_file, 'r') as wf:
for line in wf:
words.append(line[:-1]) # remove trailing newline and append
self.text = ciphertext
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
self.words = words
def choose_word(self):
known_chars = self.key.keys()
for i in range(len(self.words)):
word = self.words[i]
key_alphabet = OrderedDict() if len(word) == 0: continue
# most frequent char in English and corresponding most common char in text # count known chars in word
most_freq = self.alph[Breaker.EN_LETTER_FREQ] n = 0
for char in word:
if char in known_chars:
n +=1
key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq # skip known words or words with too many unknown
if n == len(word) or n / len(word) < 0.3:
continue
while len(key_alphabet) < 26: return self.words[i]
word_pos = Breaker.get_word_containing( def translate_and_regex(self, word: str):
self.word_file, regex = ""
next(Breaker.choose_known_letters(key_alphabet)) for char in word:
) if char in self.key.keys():
regex = regex + self.key[char]
else:
regex = regex + "."
return regex
def match_ciphertext(self, regex):
rx = re.compile(regex)
count = Counter(re.findall(self.text))
return count.most_common(1)[0][0]
def extract_unknown(self, plain, regex, cipher):
assert len(plain) == len(regex) == len(cipher)
for i in range(len(regex)):
if regex[i] != ".": continue
self.key.update({ plain[i] : cipher[i] })
return self.key
def key_to_str(self):
return str(self.key.keys())
def get_key(self):
pass while len(self.key.keys()) < 26:
most_common = Breaker.match_ciphertext( word = self.choose_word()
self.ciphertext, regex = self.translate_and_regex(word)
word_pos, cipher = self.match_ciphertext(regex)
most_freq print(word, regex, cipher, self.extract_unknown())
)
pass return self.key_to_str()
## end Breaker ## end Breaker

Loading…
Cancel
Save