Fügt Dokumentation hinzu
This commit is contained in:
91
src/mono/break_mono.py
Normal file → Executable file
91
src/mono/break_mono.py
Normal file → Executable file
@@ -2,22 +2,27 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
Python module to derive a key from an monoalphabetically encrypted file.
|
Python module to derive a key from an monoalphabetically encrypted file.
|
||||||
|
Does not work yet.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
#from string import ascii_lowercase
|
|
||||||
|
|
||||||
import pdb
|
|
||||||
|
|
||||||
class Breaker():
|
class Breaker():
|
||||||
|
"""
|
||||||
|
A handle on the various bits of data needed to derive the key from the ciphertext.
|
||||||
|
"""
|
||||||
|
|
||||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_word_file(word_file):
|
def read_word_file(word_file):
|
||||||
# excuse me
|
"""
|
||||||
|
Helper function to read the words file into memory. The rationale was that
|
||||||
|
querying would be faster and it would be possible to delete used words
|
||||||
|
without mutating the file.
|
||||||
|
"""
|
||||||
|
|
||||||
words = []
|
words = []
|
||||||
with open(word_file, 'r') as wf:
|
with open(word_file, 'r') as wf:
|
||||||
for line in wf:
|
for line in wf:
|
||||||
@@ -32,6 +37,7 @@ class Breaker():
|
|||||||
|
|
||||||
def __init__(self, ciphertext, word_file):
|
def __init__(self, ciphertext, word_file):
|
||||||
|
|
||||||
|
# count chars in the ciphertext
|
||||||
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
||||||
|
|
||||||
self.text = ciphertext
|
self.text = ciphertext
|
||||||
@@ -39,12 +45,18 @@ class Breaker():
|
|||||||
self.words = Breaker.read_word_file(word_file)
|
self.words = Breaker.read_word_file(word_file)
|
||||||
|
|
||||||
def choose_word(self):
|
def choose_word(self):
|
||||||
|
"""
|
||||||
|
Iterate through the word list and pick a word for pattern matching.
|
||||||
|
Words with chars that are completely known are are being removed.
|
||||||
|
Words where less then a third of the chars are known are being skipped.
|
||||||
|
"""
|
||||||
known_chars = self.key.keys()
|
known_chars = self.key.keys()
|
||||||
for i in range(len(self.words)):
|
for word in self.words:
|
||||||
word = self.words[i]
|
|
||||||
|
word = word + "" # copy
|
||||||
|
|
||||||
if len(word) == 0:
|
if len(word) == 0:
|
||||||
self.words.pop(i)
|
self.words.remove(word)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# count known chars in word
|
# count known chars in word
|
||||||
@@ -55,15 +67,22 @@ class Breaker():
|
|||||||
|
|
||||||
# remove known words
|
# remove known words
|
||||||
if n == len(word):
|
if n == len(word):
|
||||||
self.words.pop(i)
|
self.words.remove(word)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# skip words with too many unknown chars
|
# skip words with too many unknown chars
|
||||||
if (n / len(word) < 0.3): continue
|
if (n / len(word) < 0.3): continue
|
||||||
|
|
||||||
return self.words.pop(i)
|
self.words.remove(word)
|
||||||
|
return word
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
def translate_and_regex(self, word: str):
|
def translate_and_regex(self, word: str):
|
||||||
|
"""
|
||||||
|
Prepare chosen word for pattern matching.
|
||||||
|
Translate the known characters and replace the others with a regex '.'
|
||||||
|
"""
|
||||||
regex = ""
|
regex = ""
|
||||||
for char in word:
|
for char in word:
|
||||||
if char in self.key.keys():
|
if char in self.key.keys():
|
||||||
@@ -73,6 +92,11 @@ class Breaker():
|
|||||||
return regex
|
return regex
|
||||||
|
|
||||||
def match_ciphertext(self, regex):
|
def match_ciphertext(self, regex):
|
||||||
|
"""
|
||||||
|
Compile the translated chosen word to a regular expression and find all
|
||||||
|
matches inside the ciphertext. Count the occurences and pick the most
|
||||||
|
frequent one.
|
||||||
|
"""
|
||||||
regexc = re.compile(regex)
|
regexc = re.compile(regex)
|
||||||
count = Counter(regexc.findall(self.text))
|
count = Counter(regexc.findall(self.text))
|
||||||
if len(count) == 0:
|
if len(count) == 0:
|
||||||
@@ -81,6 +105,12 @@ class Breaker():
|
|||||||
return count.most_common(1)[0][0]
|
return count.most_common(1)[0][0]
|
||||||
|
|
||||||
def extract_unknown(self, plain, regex, cipher):
|
def extract_unknown(self, plain, regex, cipher):
|
||||||
|
"""
|
||||||
|
Compare the the chosen words in it's various forms to infer which
|
||||||
|
new characters may be added to the alphabet map.
|
||||||
|
The dots inside `regex' symbolize the positions of unknown characters
|
||||||
|
and provide a mapping between `plain' and `cipher' text.
|
||||||
|
"""
|
||||||
assert len(plain) == len(regex) == len(cipher)
|
assert len(plain) == len(regex) == len(cipher)
|
||||||
assert "." in regex
|
assert "." in regex
|
||||||
|
|
||||||
@@ -91,21 +121,50 @@ class Breaker():
|
|||||||
return self.key
|
return self.key
|
||||||
|
|
||||||
def key_to_str(self):
|
def key_to_str(self):
|
||||||
return str(self.key.keys())
|
return "".join(self.key.keys())
|
||||||
|
|
||||||
def get_key(self):
|
def get_key(self):
|
||||||
|
"""
|
||||||
|
Strings everything together.
|
||||||
|
Unfortunatly it does not work.
|
||||||
|
"""
|
||||||
while len(self.key.keys()) < 26:
|
while len(self.key.keys()) < 26:
|
||||||
|
|
||||||
word = self.choose_word()
|
word = self.choose_word()
|
||||||
|
|
||||||
|
if word == "": break # no more words with unknown chars
|
||||||
|
|
||||||
regex = self.translate_and_regex(word)
|
regex = self.translate_and_regex(word)
|
||||||
cipher = self.match_ciphertext(regex)
|
cipher = self.match_ciphertext(regex)
|
||||||
|
|
||||||
if cipher == "": continue
|
if cipher == "": continue
|
||||||
|
self.extract_unknown(word, regex, cipher)
|
||||||
print(word, regex, cipher,
|
|
||||||
self.extract_unknown(word, regex, cipher))
|
|
||||||
|
|
||||||
return self.key_to_str()
|
return self.key_to_str()
|
||||||
|
|
||||||
## end Breaker
|
## end Breaker
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys, os
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# cannot import from a parent package if called directly
|
||||||
|
# without modifying PYTHONPATH or sys.path
|
||||||
|
file_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
file_parent_dir = os.path.dirname(file_dir)
|
||||||
|
sys.path.append(file_parent_dir)
|
||||||
|
|
||||||
|
from libex01 import read_text
|
||||||
|
|
||||||
|
def parse_args(sys_argv):
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("FILE")
|
||||||
|
return parser.parse_args(sys_argv[1:])
|
||||||
|
|
||||||
|
|
||||||
|
args = parse_args(sys.argv)
|
||||||
|
txt = read_text(args.FILE)
|
||||||
|
word_file = "common.txt"
|
||||||
|
|
||||||
|
bm = Breaker(txt, word_file)
|
||||||
|
|
||||||
|
print(bm.get_key())
|
||||||
|
|||||||
Reference in New Issue
Block a user