break_mono.py wird komplett umgeschrieben

breakmono2
Daniel Tschertkow 5 years ago
parent 3331177041
commit 12cca20112

@ -3,125 +3,86 @@
"""
Python module to derive a key from an monoalphabetically encrypted file.
"""
import itertools as it
import re
from collections import OrderedDict
from collections import Counter
from string import ascii_lowercase
class Breaker():
## frequency analysis
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
@staticmethod
def get_frequency(text):
freq = Counter(text)
# Counter with lowercase ascii letters all having a count of 0
missing = Counter(dict(it.product(ascii_lowercase, [0])))
freq.update(missing)
return freq
@staticmethod
def derive_alphabet_freq(freq: Counter):
most_freq = [ item[0] for item in freq.most_common() ]
#return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
## pattern matching
@staticmethod
def get_word_containing(word_file, char_list: list):
"""
Find word from a word list file (common.txt) containing the chars
in `char_list'.
Return None it no word matches or
TUPLE(word, pos) where `pos' is a LIST of matching positions.
"""
with open(word_file, 'r') as f:
for line in f:
word = line[:-1]
pos = []
for char in char_list:
pos.append(word.find(char))
if -1 not in pos:
return word, pos
return None, None
@staticmethod
def positions(text: str, sub):
index = text.find(sub)
while index != -1:
yield index
index = text.find(sub, index + 1)
return index
@staticmethod
def match_ciphertext(text: str, word_pos: tuple, char: tuple):
"""
asdf
"""
word, wposl = word_pos
wpos = wposl[0]
wlen = len(word)
snip_count = Counter()
for pos in Breaker.positions(text, char):
word_begin = pos - wpos
snippet = text[word_begin : word_begin + wlen]
if snippet not in snip_count.elements():
snip_count[snippet] = text.count(snippet)
return snip_count.most_common(1)[0][0]
@staticmethod
def choose_known_letters(key_alphabet):
letters = list(key_alphabet.keys())
if len(key_alphabet) < 3:
yield letters
else:
for i in letters:
for j in letters:
for k in letters:
if k == j or k == i or j == i: continue
yield [i, j, k]
return None
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
def __init__(self, ciphertext, word_file):
self.ciphertext = ciphertext
self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
self.word_file = word_file
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
def get_key(self):
# excuse me:
words = []
with open(word_file, 'r') as wf:
for line in wf:
words.append(line[:-1]) # remove trailing newline and append
self.text = ciphertext
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
self.words = words
def choose_word(self):
known_chars = self.key.keys()
for i in range(len(self.words)):
word = self.words[i]
key_alphabet = OrderedDict()
if len(word) == 0: continue
# most frequent char in English and corresponding most common char in text
most_freq = self.alph[Breaker.EN_LETTER_FREQ]
# count known chars in word
n = 0
for char in word:
if char in known_chars:
n +=1
key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq
# skip known words or words with too many unknown
if n == len(word) or n / len(word) < 0.3:
continue
while len(key_alphabet) < 26:
return self.words[i]
word_pos = Breaker.get_word_containing(
self.word_file,
next(Breaker.choose_known_letters(key_alphabet))
)
def translate_and_regex(self, word: str):
regex = ""
for char in word:
if char in self.key.keys():
regex = regex + self.key[char]
else:
regex = regex + "."
return regex
def match_ciphertext(self, regex):
rx = re.compile(regex)
count = Counter(re.findall(self.text))
return count.most_common(1)[0][0]
def extract_unknown(self, plain, regex, cipher):
assert len(plain) == len(regex) == len(cipher)
for i in range(len(regex)):
if regex[i] != ".": continue
self.key.update({ plain[i] : cipher[i] })
return self.key
def key_to_str(self):
return str(self.key.keys())
def get_key(self):
pass
while len(self.key.keys()) < 26:
most_common = Breaker.match_ciphertext(
self.ciphertext,
word_pos,
most_freq
)
word = self.choose_word()
regex = self.translate_and_regex(word)
cipher = self.match_ciphertext(regex)
print(word, regex, cipher, self.extract_unknown())
pass
return self.key_to_str()
## end Breaker

Loading…
Cancel
Save