fügt diverse Bugfixes hinzu
This commit is contained in:
@@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
|
||||
import re
|
||||
|
||||
from collections import Counter
|
||||
#from string import ascii_lowercase
|
||||
|
||||
|
||||
import pdb
|
||||
|
||||
class Breaker():
|
||||
|
||||
|
||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
||||
|
||||
@staticmethod
|
||||
def read_word_file(word_file):
|
||||
# excuse me
|
||||
words = []
|
||||
with open(word_file, 'r') as wf:
|
||||
for line in wf:
|
||||
word = line[:-1] # remove trailing newline
|
||||
word = word.lower()
|
||||
|
||||
if word == "" or not word.isalpha():
|
||||
continue
|
||||
|
||||
words.append(word)
|
||||
return words
|
||||
|
||||
def __init__(self, ciphertext, word_file):
|
||||
|
||||
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
||||
|
||||
# excuse me:
|
||||
words = []
|
||||
with open(word_file, 'r') as wf:
|
||||
for line in wf:
|
||||
words.append(line[:-1]) # remove trailing newline and append
|
||||
|
||||
self.text = ciphertext
|
||||
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
|
||||
self.words = words
|
||||
self.words = Breaker.read_word_file(word_file)
|
||||
|
||||
def choose_word(self):
|
||||
known_chars = self.key.keys()
|
||||
for i in range(len(self.words)):
|
||||
word = self.words[i]
|
||||
|
||||
if len(word) == 0: continue
|
||||
if len(word) == 0:
|
||||
self.words.pop(i)
|
||||
continue
|
||||
|
||||
# count known chars in word
|
||||
n = 0
|
||||
@@ -42,11 +53,15 @@ class Breaker():
|
||||
if char in known_chars:
|
||||
n +=1
|
||||
|
||||
# skip known words or words with too many unknown
|
||||
if n == len(word) or n / len(word) < 0.3:
|
||||
# remove known words
|
||||
if n == len(word):
|
||||
self.words.pop(i)
|
||||
continue
|
||||
|
||||
return self.words[i]
|
||||
# skip words with too many unknown chars
|
||||
if (n / len(word) < 0.3): continue
|
||||
|
||||
return self.words.pop(i)
|
||||
|
||||
def translate_and_regex(self, word: str):
|
||||
regex = ""
|
||||
@@ -58,12 +73,16 @@ class Breaker():
|
||||
return regex
|
||||
|
||||
def match_ciphertext(self, regex):
|
||||
rx = re.compile(regex)
|
||||
count = Counter(re.findall(self.text))
|
||||
return count.most_common(1)[0][0]
|
||||
regexc = re.compile(regex)
|
||||
count = Counter(regexc.findall(self.text))
|
||||
if len(count) == 0:
|
||||
return ""
|
||||
else:
|
||||
return count.most_common(1)[0][0]
|
||||
|
||||
def extract_unknown(self, plain, regex, cipher):
|
||||
assert len(plain) == len(regex) == len(cipher)
|
||||
assert "." in regex
|
||||
|
||||
for i in range(len(regex)):
|
||||
if regex[i] != ".": continue
|
||||
@@ -81,7 +100,11 @@ class Breaker():
|
||||
word = self.choose_word()
|
||||
regex = self.translate_and_regex(word)
|
||||
cipher = self.match_ciphertext(regex)
|
||||
print(word, regex, cipher, self.extract_unknown())
|
||||
|
||||
if cipher == "": continue
|
||||
|
||||
print(word, regex, cipher,
|
||||
self.extract_unknown(word, regex, cipher))
|
||||
|
||||
return self.key_to_str()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user