Fügt aktuellen Zustand von break_mono.py hinzu
parent
98a78d3d35
commit
5355e25ba3
@ -0,0 +1,117 @@
|
||||
"""
|
||||
Python module to derive a key from an monoalphabetically encrypted file.
|
||||
"""
|
||||
import itertools as it
|
||||
import re
|
||||
|
||||
from collections import OrderedDict
|
||||
from collections import Counter
|
||||
from string import ascii_lowercase
|
||||
|
||||
#from libex01 import read_text
|
||||
|
||||
def mono_break(enc_txt: str):
|
||||
|
||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
||||
|
||||
def get_frequency(text):
|
||||
freq = Counter(text)
|
||||
# Counter with lowercase ascii letters all having a count of 0
|
||||
missing = Counter(dict(it.product(ascii_lowercase, [0])))
|
||||
freq.update(missing)
|
||||
return freq
|
||||
|
||||
def derive_alphabet(freq: Counter):
|
||||
return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ))
|
||||
|
||||
subs = derive_alphabet(get_frequency(enc_txt))
|
||||
dec = ""
|
||||
for char in enc_txt:
|
||||
dec += subs[char]
|
||||
return dec
|
||||
|
||||
## Frequenzanalyse
|
||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
||||
|
||||
def get_frequency(text):
|
||||
freq = Counter(text)
|
||||
# Counter with lowercase ascii letters all having a count of 0
|
||||
missing = Counter(dict(it.product(ascii_lowercase, [0])))
|
||||
freq.update(missing)
|
||||
return freq
|
||||
|
||||
def derive_alphabet_freq(freq: Counter):
|
||||
most_freq = [ item[0] for item in freq.most_common() ]
|
||||
#return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
|
||||
return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
|
||||
|
||||
## Mustersuche
|
||||
def next_char_anchor(text: str, char):
|
||||
"""
|
||||
Generator that takes the text and a char and yields positions of that char.
|
||||
Adjust starting position by slicing.
|
||||
Return generator closure.
|
||||
"""
|
||||
index = text.find(char)
|
||||
while index != -1:
|
||||
yield (index, char)
|
||||
index = text.find(char, index + 1)
|
||||
return (index, char)
|
||||
|
||||
|
||||
def match_word(text: str, word: str, pos_iter):
|
||||
"""
|
||||
Align with anchor and check hypothesis.
|
||||
First hypothesis is the frequency analysis.
|
||||
Align `word' with `text' for each anchor.
|
||||
For remaining anchors check the occurence of pattern
|
||||
and match with word. If it is true more than once, save it as a
|
||||
new hypothesis.
|
||||
"""
|
||||
pass
|
||||
|
||||
def next_match(text: str, word: str, alphabet: dict, pos_iter: generator):
|
||||
pass
|
||||
|
||||
class Breaker():
|
||||
|
||||
EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz")
|
||||
|
||||
def __init__(self, ciphertext, word_file):
|
||||
self.alph = derive_alphabet_freq(get_frequency(ciphertext))
|
||||
self.word_file = None # TODO
|
||||
|
||||
def get_key(self):
|
||||
|
||||
def get_word_containing(char_list: list):
|
||||
"""
|
||||
Find word from a word list file (common.txt) containing the chars
|
||||
in `char_list'.
|
||||
Return None it no word matches or
|
||||
TUPLE(word, pos) where `pos' is a LIST of matching positions.
|
||||
"""
|
||||
with open(self.word_file, 'r') as f:
|
||||
for word in f:
|
||||
pos = []
|
||||
for char in char_list:
|
||||
pos += word.find(char)
|
||||
if -1 not in pos:
|
||||
return word, pos
|
||||
return None
|
||||
|
||||
most_freq = next(iter(self.alph)) # most frequent char
|
||||
word_having_char, pos = get_word_containing(most_freq) # unpack
|
||||
|
||||
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
|
||||
|
||||
|
||||
freq = get_frequency(text)
|
||||
alph = derive_alphabet(freq)
|
||||
|
||||
print(alph.values())
|
||||
Loading…
Reference in New Issue