fügt diverse Bugfixes hinzu

breakmono2
Daniel Tschertkow 5 years ago
parent 12cca20112
commit afb6faabbc

@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
import re import re
from collections import Counter from collections import Counter
#from string import ascii_lowercase
import pdb
class Breaker(): class Breaker():
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
def __init__(self, ciphertext, word_file): @staticmethod
def read_word_file(word_file):
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] # excuse me
# excuse me:
words = [] words = []
with open(word_file, 'r') as wf: with open(word_file, 'r') as wf:
for line in wf: for line in wf:
words.append(line[:-1]) # remove trailing newline and append word = line[:-1] # remove trailing newline
word = word.lower()
if word == "" or not word.isalpha():
continue
words.append(word)
return words
def __init__(self, ciphertext, word_file):
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
self.text = ciphertext self.text = ciphertext
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
self.words = words self.words = Breaker.read_word_file(word_file)
def choose_word(self): def choose_word(self):
known_chars = self.key.keys() known_chars = self.key.keys()
for i in range(len(self.words)): for i in range(len(self.words)):
word = self.words[i] word = self.words[i]
if len(word) == 0: continue if len(word) == 0:
self.words.pop(i)
continue
# count known chars in word # count known chars in word
n = 0 n = 0
@ -42,11 +53,15 @@ class Breaker():
if char in known_chars: if char in known_chars:
n +=1 n +=1
# skip known words or words with too many unknown # remove known words
if n == len(word) or n / len(word) < 0.3: if n == len(word):
self.words.pop(i)
continue continue
return self.words[i] # skip words with too many unknown chars
if (n / len(word) < 0.3): continue
return self.words.pop(i)
def translate_and_regex(self, word: str): def translate_and_regex(self, word: str):
regex = "" regex = ""
@ -58,12 +73,16 @@ class Breaker():
return regex return regex
def match_ciphertext(self, regex): def match_ciphertext(self, regex):
rx = re.compile(regex) regexc = re.compile(regex)
count = Counter(re.findall(self.text)) count = Counter(regexc.findall(self.text))
return count.most_common(1)[0][0] if len(count) == 0:
return ""
else:
return count.most_common(1)[0][0]
def extract_unknown(self, plain, regex, cipher): def extract_unknown(self, plain, regex, cipher):
assert len(plain) == len(regex) == len(cipher) assert len(plain) == len(regex) == len(cipher)
assert "." in regex
for i in range(len(regex)): for i in range(len(regex)):
if regex[i] != ".": continue if regex[i] != ".": continue
@ -81,7 +100,11 @@ class Breaker():
word = self.choose_word() word = self.choose_word()
regex = self.translate_and_regex(word) regex = self.translate_and_regex(word)
cipher = self.match_ciphertext(regex) cipher = self.match_ciphertext(regex)
print(word, regex, cipher, self.extract_unknown())
if cipher == "": continue
print(word, regex, cipher,
self.extract_unknown(word, regex, cipher))
return self.key_to_str() return self.key_to_str()

Loading…
Cancel
Save