From 0004679a83e01ca3bb007f167a5eacf55d5f7f31 Mon Sep 17 00:00:00 2001 From: Eggert Jung Date: Wed, 25 Nov 2020 11:28:40 +0100 Subject: [PATCH] alternative version of break mono dooing something, poorly working --- src/mono/break_mono.py | 217 ++++++++++++------------------------------------- 1 file changed, 53 insertions(+), 164 deletions(-) mode change 100755 => 100644 src/mono/break_mono.py diff --git a/src/mono/break_mono.py b/src/mono/break_mono.py old mode 100755 new mode 100644 index 4bb44a1..fc92e4c --- a/src/mono/break_mono.py +++ b/src/mono/break_mono.py @@ -1,170 +1,59 @@ -#!/usr/bin/env python +from random import randint as rand +def randomize_key(key): + a_index = rand(0, 25) + b_index = rand(0, 25) + a = key[a_index] + b = key[b_index] + key = key[:a_index] + b + key[a_index + 1:] + key = key[:b_index] + a + key[b_index + 1:] -""" -Python module to derive a key from an monoalphabetically encrypted file. -Does not work yet. -""" + return key -import re -from collections import Counter - -class Breaker(): - """ - A handle on the various bits of data needed to derive the key from the ciphertext. - """ - - EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") - - @staticmethod - def read_word_file(word_file): - """ - Helper function to read the words file into memory. The rationale was that - querying would be faster and it would be possible to delete used words - without mutating the file. - """ - - words = [] - with open(word_file, 'r') as wf: - for line in wf: - word = line[:-1] # remove trailing newline - word = word.lower() - - if word == "" or not word.isalpha(): - continue - - words.append(word) - return words - - def __init__(self, ciphertext, word_file): - - # count chars in the ciphertext - most_freq_cipher = Counter(ciphertext).most_common(1)[0][0] - - self.text = ciphertext - self.key = { Breaker.EN_LETTER_FREQ[0] : most_freq_cipher } - self.words = Breaker.read_word_file(word_file) - - def choose_word(self): - """ - Iterate through the word list and pick a word for pattern matching. - Words with chars that are completely known are are being removed. - Words where less then a third of the chars are known are being skipped. - """ - known_chars = self.key.keys() - for word in self.words: - - word = word + "" # copy - - if len(word) == 0: - self.words.remove(word) - continue - - # count known chars in word - n = 0 - for char in word: - if char in known_chars: - n +=1 - - # remove known words - if n == len(word): - self.words.remove(word) - continue - - # skip words with too many unknown chars - if (n / len(word) < 0.3): continue - - self.words.remove(word) - return word - - return "" - - def translate_and_regex(self, word: str): - """ - Prepare chosen word for pattern matching. - Translate the known characters and replace the others with a regex '.' - """ - regex = "" - for char in word: - if char in self.key.keys(): - regex = regex + self.key[char] - else: - regex = regex + "." - return regex - - def match_ciphertext(self, regex): - """ - Compile the translated chosen word to a regular expression and find all - matches inside the ciphertext. Count the occurences and pick the most - frequent one. - """ - regexc = re.compile(regex) - count = Counter(regexc.findall(self.text)) - if len(count) == 0: - return "" - else: - return count.most_common(1)[0][0] - - def extract_unknown(self, plain, regex, cipher): - """ - Compare the the chosen words in it's various forms to infer which - new characters may be added to the alphabet map. - The dots inside `regex' symbolize the positions of unknown characters - and provide a mapping between `plain' and `cipher' text. - """ - assert len(plain) == len(regex) == len(cipher) - assert "." in regex - - for i in range(len(regex)): - if regex[i] != ".": continue - self.key.update({ plain[i] : cipher[i] }) - - return self.key - - def key_to_str(self): - return "".join(self.key.keys()) - - def get_key(self): - """ - Strings everything together. - Unfortunatly it does not work. - """ - while len(self.key.keys()) < 26: - - word = self.choose_word() - - if word == "": break # no more words with unknown chars - - regex = self.translate_and_regex(word) - cipher = self.match_ciphertext(regex) - - if cipher == "": continue - self.extract_unknown(word, regex, cipher) - - return self.key_to_str() - ## end Breaker if __name__ == "__main__": - import sys, os import argparse + parser = argparse.ArgumentParser() + parser.add_argument('FILE') + args = parser.parse_args() + + t = ''.join([x for x in open(args.FILE, "r").read().lower() if x.isalpha()]) + import os + words = open(os.path.abspath(os.path.dirname(__file__))+"/common.txt", "r").read().split('\n') + + occurance_string = 'etaoinsrhdlucmfywgpbvkxqjz' + + occur_count = {chr(x+0x61): 0 for x in range(26)} + for char in t: + occur_count[char]+=1 + mapping = {} + sorted_occur = sorted(occur_count, key=occur_count.__getitem__, reverse=True) + for i in range(26): + mapping[occurance_string[i]]=sorted_occur[i] + key = ''.join([x for _, x in sorted(mapping.items())]) + + import re + import mono + + best_score = 0 + best_key = key + + trys = 0 + while trys < 1000: + score = 0 + tmp = randomize_key(key) + plain = mono.mono_decrypt(t, tmp) + for word in words: + if re.search(word, plain): + score += 1 + if score > best_score: + trys = 0 + best_score = score + best_key = key + key=tmp + print(best_score) + else: + trys+=1 - # cannot import from a parent package if called directly - # without modifying PYTHONPATH or sys.path - file_dir = os.path.dirname(os.path.abspath(__file__)) - file_parent_dir = os.path.dirname(file_dir) - sys.path.append(file_parent_dir) - - from libex01 import read_text - - def parse_args(sys_argv): - parser = argparse.ArgumentParser() - parser.add_argument("FILE") - return parser.parse_args(sys_argv[1:]) - - - args = parse_args(sys.argv) - txt = read_text(args.FILE) - word_file = "common.txt" - - bm = Breaker(txt, word_file) - - print(bm.get_key()) + print(best_key) + print() + print(mono.mono_decrypt(t, best_key))