fügt diverse Bugfixes hinzu
This commit is contained in:
@@ -7,34 +7,45 @@ Python module to derive a key from an monoalphabetically encrypted file.
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
#from string import ascii_lowercase
|
||||||
|
|
||||||
|
import pdb
|
||||||
|
|
||||||
class Breaker():
|
class Breaker():
|
||||||
|
|
||||||
|
|
||||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def read_word_file(word_file):
|
||||||
|
# excuse me
|
||||||
|
words = []
|
||||||
|
with open(word_file, 'r') as wf:
|
||||||
|
for line in wf:
|
||||||
|
word = line[:-1] # remove trailing newline
|
||||||
|
word = word.lower()
|
||||||
|
|
||||||
|
if word == "" or not word.isalpha():
|
||||||
|
continue
|
||||||
|
|
||||||
|
words.append(word)
|
||||||
|
return words
|
||||||
|
|
||||||
def __init__(self, ciphertext, word_file):
|
def __init__(self, ciphertext, word_file):
|
||||||
|
|
||||||
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
||||||
|
|
||||||
# excuse me:
|
|
||||||
words = []
|
|
||||||
with open(word_file, 'r') as wf:
|
|
||||||
for line in wf:
|
|
||||||
words.append(line[:-1]) # remove trailing newline and append
|
|
||||||
|
|
||||||
self.text = ciphertext
|
self.text = ciphertext
|
||||||
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
|
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
|
||||||
self.words = words
|
self.words = Breaker.read_word_file(word_file)
|
||||||
|
|
||||||
def choose_word(self):
|
def choose_word(self):
|
||||||
known_chars = self.key.keys()
|
known_chars = self.key.keys()
|
||||||
for i in range(len(self.words)):
|
for i in range(len(self.words)):
|
||||||
word = self.words[i]
|
word = self.words[i]
|
||||||
|
|
||||||
if len(word) == 0: continue
|
if len(word) == 0:
|
||||||
|
self.words.pop(i)
|
||||||
|
continue
|
||||||
|
|
||||||
# count known chars in word
|
# count known chars in word
|
||||||
n = 0
|
n = 0
|
||||||
@@ -42,11 +53,15 @@ class Breaker():
|
|||||||
if char in known_chars:
|
if char in known_chars:
|
||||||
n +=1
|
n +=1
|
||||||
|
|
||||||
# skip known words or words with too many unknown
|
# remove known words
|
||||||
if n == len(word) or n / len(word) < 0.3:
|
if n == len(word):
|
||||||
|
self.words.pop(i)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return self.words[i]
|
# skip words with too many unknown chars
|
||||||
|
if (n / len(word) < 0.3): continue
|
||||||
|
|
||||||
|
return self.words.pop(i)
|
||||||
|
|
||||||
def translate_and_regex(self, word: str):
|
def translate_and_regex(self, word: str):
|
||||||
regex = ""
|
regex = ""
|
||||||
@@ -58,12 +73,16 @@ class Breaker():
|
|||||||
return regex
|
return regex
|
||||||
|
|
||||||
def match_ciphertext(self, regex):
|
def match_ciphertext(self, regex):
|
||||||
rx = re.compile(regex)
|
regexc = re.compile(regex)
|
||||||
count = Counter(re.findall(self.text))
|
count = Counter(regexc.findall(self.text))
|
||||||
return count.most_common(1)[0][0]
|
if len(count) == 0:
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
return count.most_common(1)[0][0]
|
||||||
|
|
||||||
def extract_unknown(self, plain, regex, cipher):
|
def extract_unknown(self, plain, regex, cipher):
|
||||||
assert len(plain) == len(regex) == len(cipher)
|
assert len(plain) == len(regex) == len(cipher)
|
||||||
|
assert "." in regex
|
||||||
|
|
||||||
for i in range(len(regex)):
|
for i in range(len(regex)):
|
||||||
if regex[i] != ".": continue
|
if regex[i] != ".": continue
|
||||||
@@ -81,7 +100,11 @@ class Breaker():
|
|||||||
word = self.choose_word()
|
word = self.choose_word()
|
||||||
regex = self.translate_and_regex(word)
|
regex = self.translate_and_regex(word)
|
||||||
cipher = self.match_ciphertext(regex)
|
cipher = self.match_ciphertext(regex)
|
||||||
print(word, regex, cipher, self.extract_unknown())
|
|
||||||
|
if cipher == "": continue
|
||||||
|
|
||||||
|
print(word, regex, cipher,
|
||||||
|
self.extract_unknown(word, regex, cipher))
|
||||||
|
|
||||||
return self.key_to_str()
|
return self.key_to_str()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user