break_mono.py wird komplett umgeschrieben

breakmono2
Daniel Tschertkow 5 years ago
parent 3331177041
commit 12cca20112

@ -3,125 +3,86 @@
""" """
Python module to derive a key from an monoalphabetically encrypted file. Python module to derive a key from an monoalphabetically encrypted file.
""" """
import itertools as it
import re import re
from collections import OrderedDict
from collections import Counter from collections import Counter
from string import ascii_lowercase
class Breaker(): class Breaker():
## frequency analysis
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
@staticmethod EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
def get_frequency(text):
freq = Counter(text)
# Counter with lowercase ascii letters all having a count of 0
missing = Counter(dict(it.product(ascii_lowercase, [0])))
freq.update(missing)
return freq
def __init__(self, ciphertext, word_file):
@staticmethod most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
def derive_alphabet_freq(freq: Counter):
most_freq = [ item[0] for item in freq.most_common() ]
#return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
# excuse me:
words = []
with open(word_file, 'r') as wf:
for line in wf:
words.append(line[:-1]) # remove trailing newline and append
## pattern matching self.text = ciphertext
@staticmethod self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
def get_word_containing(word_file, char_list: list): self.words = words
"""
Find word from a word list file (common.txt) containing the chars
in `char_list'.
Return None it no word matches or
TUPLE(word, pos) where `pos' is a LIST of matching positions.
"""
with open(word_file, 'r') as f:
for line in f:
word = line[:-1]
pos = []
for char in char_list:
pos.append(word.find(char))
if -1 not in pos:
return word, pos
return None, None
@staticmethod
def positions(text: str, sub):
index = text.find(sub)
while index != -1:
yield index
index = text.find(sub, index + 1)
return index
@staticmethod
def match_ciphertext(text: str, word_pos: tuple, char: tuple):
"""
asdf
"""
word, wposl = word_pos
wpos = wposl[0]
wlen = len(word)
snip_count = Counter() def choose_word(self):
for pos in Breaker.positions(text, char): known_chars = self.key.keys()
word_begin = pos - wpos for i in range(len(self.words)):
snippet = text[word_begin : word_begin + wlen] word = self.words[i]
if snippet not in snip_count.elements(): if len(word) == 0: continue
snip_count[snippet] = text.count(snippet)
return snip_count.most_common(1)[0][0] # count known chars in word
n = 0
for char in word:
if char in known_chars:
n +=1
@staticmethod # skip known words or words with too many unknown
def choose_known_letters(key_alphabet): if n == len(word) or n / len(word) < 0.3:
letters = list(key_alphabet.keys()) continue
if len(key_alphabet) < 3:
yield letters
else:
for i in letters:
for j in letters:
for k in letters:
if k == j or k == i or j == i: continue
yield [i, j, k]
return None
def __init__(self, ciphertext, word_file): return self.words[i]
self.ciphertext = ciphertext
self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
self.word_file = word_file
def translate_and_regex(self, word: str):
regex = ""
for char in word:
if char in self.key.keys():
regex = regex + self.key[char]
else:
regex = regex + "."
return regex
def get_key(self): def match_ciphertext(self, regex):
rx = re.compile(regex)
count = Counter(re.findall(self.text))
return count.most_common(1)[0][0]
key_alphabet = OrderedDict() def extract_unknown(self, plain, regex, cipher):
assert len(plain) == len(regex) == len(cipher)
# most frequent char in English and corresponding most common char in text for i in range(len(regex)):
most_freq = self.alph[Breaker.EN_LETTER_FREQ] if regex[i] != ".": continue
self.key.update({ plain[i] : cipher[i] })
key_alphabet[Breaker.EN_LETTER_FREQ[0]] = most_freq return self.key
while len(key_alphabet) < 26: def key_to_str(self):
return str(self.key.keys())
word_pos = Breaker.get_word_containing( def get_key(self):
self.word_file,
next(Breaker.choose_known_letters(key_alphabet))
)
pass while len(self.key.keys()) < 26:
most_common = Breaker.match_ciphertext( word = self.choose_word()
self.ciphertext, regex = self.translate_and_regex(word)
word_pos, cipher = self.match_ciphertext(regex)
most_freq print(word, regex, cipher, self.extract_unknown())
)
pass return self.key_to_str()
## end Breaker ## end Breaker

Loading…
Cancel
Save