Fügt Dokumentation hinzu

breakmono2
Daniel Tschertkow 5 years ago
parent afb6faabbc
commit ed3d03d599

@ -2,22 +2,27 @@
""" """
Python module to derive a key from an monoalphabetically encrypted file. Python module to derive a key from an monoalphabetically encrypted file.
Does not work yet.
""" """
import re import re
from collections import Counter from collections import Counter
#from string import ascii_lowercase
import pdb
class Breaker(): class Breaker():
"""
A handle on the various bits of data needed to derive the key from the ciphertext.
"""
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
@staticmethod @staticmethod
def read_word_file(word_file): def read_word_file(word_file):
# excuse me """
Helper function to read the words file into memory. The rationale was that
querying would be faster and it would be possible to delete used words
without mutating the file.
"""
words = [] words = []
with open(word_file, 'r') as wf: with open(word_file, 'r') as wf:
for line in wf: for line in wf:
@ -32,6 +37,7 @@ class Breaker():
def __init__(self, ciphertext, word_file): def __init__(self, ciphertext, word_file):
# count chars in the ciphertext
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
self.text = ciphertext self.text = ciphertext
@ -39,12 +45,18 @@ class Breaker():
self.words = Breaker.read_word_file(word_file) self.words = Breaker.read_word_file(word_file)
def choose_word(self): def choose_word(self):
"""
Iterate through the word list and pick a word for pattern matching.
Words with chars that are completely known are are being removed.
Words where less then a third of the chars are known are being skipped.
"""
known_chars = self.key.keys() known_chars = self.key.keys()
for i in range(len(self.words)): for word in self.words:
word = self.words[i]
word = word + "" # copy
if len(word) == 0: if len(word) == 0:
self.words.pop(i) self.words.remove(word)
continue continue
# count known chars in word # count known chars in word
@ -55,15 +67,22 @@ class Breaker():
# remove known words # remove known words
if n == len(word): if n == len(word):
self.words.pop(i) self.words.remove(word)
continue continue
# skip words with too many unknown chars # skip words with too many unknown chars
if (n / len(word) < 0.3): continue if (n / len(word) < 0.3): continue
return self.words.pop(i) self.words.remove(word)
return word
return ""
def translate_and_regex(self, word: str): def translate_and_regex(self, word: str):
"""
Prepare chosen word for pattern matching.
Translate the known characters and replace the others with a regex '.'
"""
regex = "" regex = ""
for char in word: for char in word:
if char in self.key.keys(): if char in self.key.keys():
@ -73,6 +92,11 @@ class Breaker():
return regex return regex
def match_ciphertext(self, regex): def match_ciphertext(self, regex):
"""
Compile the translated chosen word to a regular expression and find all
matches inside the ciphertext. Count the occurences and pick the most
frequent one.
"""
regexc = re.compile(regex) regexc = re.compile(regex)
count = Counter(regexc.findall(self.text)) count = Counter(regexc.findall(self.text))
if len(count) == 0: if len(count) == 0:
@ -81,6 +105,12 @@ class Breaker():
return count.most_common(1)[0][0] return count.most_common(1)[0][0]
def extract_unknown(self, plain, regex, cipher): def extract_unknown(self, plain, regex, cipher):
"""
Compare the the chosen words in it's various forms to infer which
new characters may be added to the alphabet map.
The dots inside `regex' symbolize the positions of unknown characters
and provide a mapping between `plain' and `cipher' text.
"""
assert len(plain) == len(regex) == len(cipher) assert len(plain) == len(regex) == len(cipher)
assert "." in regex assert "." in regex
@ -91,21 +121,50 @@ class Breaker():
return self.key return self.key
def key_to_str(self): def key_to_str(self):
return str(self.key.keys()) return "".join(self.key.keys())
def get_key(self): def get_key(self):
"""
Strings everything together.
Unfortunatly it does not work.
"""
while len(self.key.keys()) < 26: while len(self.key.keys()) < 26:
word = self.choose_word() word = self.choose_word()
if word == "": break # no more words with unknown chars
regex = self.translate_and_regex(word) regex = self.translate_and_regex(word)
cipher = self.match_ciphertext(regex) cipher = self.match_ciphertext(regex)
if cipher == "": continue if cipher == "": continue
self.extract_unknown(word, regex, cipher)
print(word, regex, cipher,
self.extract_unknown(word, regex, cipher))
return self.key_to_str() return self.key_to_str()
## end Breaker ## end Breaker
if __name__ == "__main__":
import sys, os
import argparse
# cannot import from a parent package if called directly
# without modifying PYTHONPATH or sys.path
file_dir = os.path.dirname(os.path.abspath(__file__))
file_parent_dir = os.path.dirname(file_dir)
sys.path.append(file_parent_dir)
from libex01 import read_text
def parse_args(sys_argv):
parser = argparse.ArgumentParser()
parser.add_argument("FILE")
return parser.parse_args(sys_argv[1:])
args = parse_args(sys.argv)
txt = read_text(args.FILE)
word_file = "common.txt"
bm = Breaker(txt, word_file)
print(bm.get_key())

Loading…
Cancel
Save