parent
30768b4592
commit
0004679a83
@ -1,170 +1,59 @@
|
|||||||
#!/usr/bin/env python
|
from random import randint as rand
|
||||||
|
def randomize_key(key):
|
||||||
|
a_index = rand(0, 25)
|
||||||
|
b_index = rand(0, 25)
|
||||||
|
a = key[a_index]
|
||||||
|
b = key[b_index]
|
||||||
|
key = key[:a_index] + b + key[a_index + 1:]
|
||||||
|
key = key[:b_index] + a + key[b_index + 1:]
|
||||||
|
|
||||||
"""
|
return key
|
||||||
Python module to derive a key from an monoalphabetically encrypted file.
|
|
||||||
Does not work yet.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
from collections import Counter
|
|
||||||
|
|
||||||
class Breaker():
|
|
||||||
"""
|
|
||||||
A handle on the various bits of data needed to derive the key from the ciphertext.
|
|
||||||
"""
|
|
||||||
|
|
||||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_word_file(word_file):
|
|
||||||
"""
|
|
||||||
Helper function to read the words file into memory. The rationale was that
|
|
||||||
querying would be faster and it would be possible to delete used words
|
|
||||||
without mutating the file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
words = []
|
|
||||||
with open(word_file, 'r') as wf:
|
|
||||||
for line in wf:
|
|
||||||
word = line[:-1] # remove trailing newline
|
|
||||||
word = word.lower()
|
|
||||||
|
|
||||||
if word == "" or not word.isalpha():
|
|
||||||
continue
|
|
||||||
|
|
||||||
words.append(word)
|
|
||||||
return words
|
|
||||||
|
|
||||||
def __init__(self, ciphertext, word_file):
|
|
||||||
|
|
||||||
# count chars in the ciphertext
|
|
||||||
most_freq_cipher = Counter(ciphertext).most_common(1)[0][0]
|
|
||||||
|
|
||||||
self.text = ciphertext
|
|
||||||
self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher }
|
|
||||||
self.words = Breaker.read_word_file(word_file)
|
|
||||||
|
|
||||||
def choose_word(self):
|
|
||||||
"""
|
|
||||||
Iterate through the word list and pick a word for pattern matching.
|
|
||||||
Words with chars that are completely known are are being removed.
|
|
||||||
Words where less then a third of the chars are known are being skipped.
|
|
||||||
"""
|
|
||||||
known_chars = self.key.keys()
|
|
||||||
for word in self.words:
|
|
||||||
|
|
||||||
word = word + "" # copy
|
|
||||||
|
|
||||||
if len(word) == 0:
|
|
||||||
self.words.remove(word)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# count known chars in word
|
|
||||||
n = 0
|
|
||||||
for char in word:
|
|
||||||
if char in known_chars:
|
|
||||||
n +=1
|
|
||||||
|
|
||||||
# remove known words
|
|
||||||
if n == len(word):
|
|
||||||
self.words.remove(word)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# skip words with too many unknown chars
|
|
||||||
if (n / len(word) < 0.3): continue
|
|
||||||
|
|
||||||
self.words.remove(word)
|
|
||||||
return word
|
|
||||||
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def translate_and_regex(self, word: str):
|
|
||||||
"""
|
|
||||||
Prepare chosen word for pattern matching.
|
|
||||||
Translate the known characters and replace the others with a regex '.'
|
|
||||||
"""
|
|
||||||
regex = ""
|
|
||||||
for char in word:
|
|
||||||
if char in self.key.keys():
|
|
||||||
regex = regex + self.key[char]
|
|
||||||
else:
|
|
||||||
regex = regex + "."
|
|
||||||
return regex
|
|
||||||
|
|
||||||
def match_ciphertext(self, regex):
|
|
||||||
"""
|
|
||||||
Compile the translated chosen word to a regular expression and find all
|
|
||||||
matches inside the ciphertext. Count the occurences and pick the most
|
|
||||||
frequent one.
|
|
||||||
"""
|
|
||||||
regexc = re.compile(regex)
|
|
||||||
count = Counter(regexc.findall(self.text))
|
|
||||||
if len(count) == 0:
|
|
||||||
return ""
|
|
||||||
else:
|
|
||||||
return count.most_common(1)[0][0]
|
|
||||||
|
|
||||||
def extract_unknown(self, plain, regex, cipher):
|
|
||||||
"""
|
|
||||||
Compare the the chosen words in it's various forms to infer which
|
|
||||||
new characters may be added to the alphabet map.
|
|
||||||
The dots inside `regex' symbolize the positions of unknown characters
|
|
||||||
and provide a mapping between `plain' and `cipher' text.
|
|
||||||
"""
|
|
||||||
assert len(plain) == len(regex) == len(cipher)
|
|
||||||
assert "." in regex
|
|
||||||
|
|
||||||
for i in range(len(regex)):
|
|
||||||
if regex[i] != ".": continue
|
|
||||||
self.key.update({ plain[i] : cipher[i] })
|
|
||||||
|
|
||||||
return self.key
|
|
||||||
|
|
||||||
def key_to_str(self):
|
|
||||||
return "".join(self.key.keys())
|
|
||||||
|
|
||||||
def get_key(self):
|
|
||||||
"""
|
|
||||||
Strings everything together.
|
|
||||||
Unfortunatly it does not work.
|
|
||||||
"""
|
|
||||||
while len(self.key.keys()) < 26:
|
|
||||||
|
|
||||||
word = self.choose_word()
|
|
||||||
|
|
||||||
if word == "": break # no more words with unknown chars
|
|
||||||
|
|
||||||
regex = self.translate_and_regex(word)
|
|
||||||
cipher = self.match_ciphertext(regex)
|
|
||||||
|
|
||||||
if cipher == "": continue
|
|
||||||
self.extract_unknown(word, regex, cipher)
|
|
||||||
|
|
||||||
return self.key_to_str()
|
|
||||||
## end Breaker
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys, os
|
|
||||||
import argparse
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('FILE')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
t = ''.join([x for x in open(args.FILE, "r").read().lower() if x.isalpha()])
|
||||||
|
import os
|
||||||
|
words = open(os.path.abspath(os.path.dirname(__file__))+"/common.txt", "r").read().split('\n')
|
||||||
|
|
||||||
|
occurance_string = 'etaoinsrhdlucmfywgpbvkxqjz'
|
||||||
|
|
||||||
|
occur_count = {chr(x+0x61): 0 for x in range(26)}
|
||||||
|
for char in t:
|
||||||
|
occur_count[char]+=1
|
||||||
|
mapping = {}
|
||||||
|
sorted_occur = sorted(occur_count, key=occur_count.__getitem__, reverse=True)
|
||||||
|
for i in range(26):
|
||||||
|
mapping[occurance_string[i]]=sorted_occur[i]
|
||||||
|
key = ''.join([x for _, x in sorted(mapping.items())])
|
||||||
|
|
||||||
|
import re
|
||||||
|
import mono
|
||||||
|
|
||||||
|
best_score = 0
|
||||||
|
best_key = key
|
||||||
|
|
||||||
|
trys = 0
|
||||||
|
while trys < 1000:
|
||||||
|
score = 0
|
||||||
|
tmp = randomize_key(key)
|
||||||
|
plain = mono.mono_decrypt(t, tmp)
|
||||||
|
for word in words:
|
||||||
|
if re.search(word, plain):
|
||||||
|
score += 1
|
||||||
|
if score > best_score:
|
||||||
|
trys = 0
|
||||||
|
best_score = score
|
||||||
|
best_key = key
|
||||||
|
key=tmp
|
||||||
|
print(best_score)
|
||||||
|
else:
|
||||||
|
trys+=1
|
||||||
|
|
||||||
# cannot import from a parent package if called directly
|
print(best_key)
|
||||||
# without modifying PYTHONPATH or sys.path
|
print()
|
||||||
file_dir = os.path.dirname(os.path.abspath(__file__))
|
print(mono.mono_decrypt(t, best_key))
|
||||||
file_parent_dir = os.path.dirname(file_dir)
|
|
||||||
sys.path.append(file_parent_dir)
|
|
||||||
|
|
||||||
from libex01 import read_text
|
|
||||||
|
|
||||||
def parse_args(sys_argv):
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("FILE")
|
|
||||||
return parser.parse_args(sys_argv[1:])
|
|
||||||
|
|
||||||
|
|
||||||
args = parse_args(sys.argv)
|
|
||||||
txt = read_text(args.FILE)
|
|
||||||
word_file = "common.txt"
|
|
||||||
|
|
||||||
bm = Breaker(txt, word_file)
|
|
||||||
|
|
||||||
print(bm.get_key())
|
|
||||||
|
|||||||
Loading…
Reference in New Issue