|
|
|
|
@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
from collections import Counter
|
|
|
|
|
#from string import ascii_lowercase
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pdb
|
|
|
|
|
|
|
|
|
|
class Breaker():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
|
|
|
|
|
|
|
|
|
def __init__(self, ciphertext, word_file):
|
|
|
|
|
|
|
|
|
|
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
|
|
|
|
|
|
|
|
|
# excuse me:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def read_word_file(word_file):
|
|
|
|
|
# excuse me
|
|
|
|
|
words = []
|
|
|
|
|
with open(word_file, 'r') as wf:
|
|
|
|
|
for line in wf:
|
|
|
|
|
words.append(line[:-1]) # remove trailing newline and append
|
|
|
|
|
word = line[:-1] # remove trailing newline
|
|
|
|
|
word = word.lower()
|
|
|
|
|
|
|
|
|
|
if word == "" or not word.isalpha():
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
words.append(word)
|
|
|
|
|
return words
|
|
|
|
|
|
|
|
|
|
def __init__(self, ciphertext, word_file):
|
|
|
|
|
|
|
|
|
|
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
|
|
|
|
|
|
|
|
|
self.text = ciphertext
|
|
|
|
|
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
|
|
|
|
|
self.words = words
|
|
|
|
|
self.words = Breaker.read_word_file(word_file)
|
|
|
|
|
|
|
|
|
|
def choose_word(self):
|
|
|
|
|
known_chars = self.key.keys()
|
|
|
|
|
for i in range(len(self.words)):
|
|
|
|
|
word = self.words[i]
|
|
|
|
|
|
|
|
|
|
if len(word) == 0: continue
|
|
|
|
|
if len(word) == 0:
|
|
|
|
|
self.words.pop(i)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# count known chars in word
|
|
|
|
|
n = 0
|
|
|
|
|
@ -42,11 +53,15 @@ class Breaker():
|
|
|
|
|
if char in known_chars:
|
|
|
|
|
n +=1
|
|
|
|
|
|
|
|
|
|
# skip known words or words with too many unknown
|
|
|
|
|
if n == len(word) or n / len(word) < 0.3:
|
|
|
|
|
# remove known words
|
|
|
|
|
if n == len(word):
|
|
|
|
|
self.words.pop(i)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
return self.words[i]
|
|
|
|
|
# skip words with too many unknown chars
|
|
|
|
|
if (n / len(word) < 0.3): continue
|
|
|
|
|
|
|
|
|
|
return self.words.pop(i)
|
|
|
|
|
|
|
|
|
|
def translate_and_regex(self, word: str):
|
|
|
|
|
regex = ""
|
|
|
|
|
@ -58,12 +73,16 @@ class Breaker():
|
|
|
|
|
return regex
|
|
|
|
|
|
|
|
|
|
def match_ciphertext(self, regex):
|
|
|
|
|
rx = re.compile(regex)
|
|
|
|
|
count = Counter(re.findall(self.text))
|
|
|
|
|
return count.most_common(1)[0][0]
|
|
|
|
|
regexc = re.compile(regex)
|
|
|
|
|
count = Counter(regexc.findall(self.text))
|
|
|
|
|
if len(count) == 0:
|
|
|
|
|
return ""
|
|
|
|
|
else:
|
|
|
|
|
return count.most_common(1)[0][0]
|
|
|
|
|
|
|
|
|
|
def extract_unknown(self, plain, regex, cipher):
|
|
|
|
|
assert len(plain) == len(regex) == len(cipher)
|
|
|
|
|
assert "." in regex
|
|
|
|
|
|
|
|
|
|
for i in range(len(regex)):
|
|
|
|
|
if regex[i] != ".": continue
|
|
|
|
|
@ -81,7 +100,11 @@ class Breaker():
|
|
|
|
|
word = self.choose_word()
|
|
|
|
|
regex = self.translate_and_regex(word)
|
|
|
|
|
cipher = self.match_ciphertext(regex)
|
|
|
|
|
print(word, regex, cipher, self.extract_unknown())
|
|
|
|
|
|
|
|
|
|
if cipher == "": continue
|
|
|
|
|
|
|
|
|
|
print(word, regex, cipher,
|
|
|
|
|
self.extract_unknown(word, regex, cipher))
|
|
|
|
|
|
|
|
|
|
return self.key_to_str()
|
|
|
|
|
|
|
|
|
|
|