|
|
|
|
@ -1,3 +1,5 @@
|
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
Python module to derive a key from an monoalphabetically encrypted file.
|
|
|
|
|
"""
|
|
|
|
|
@ -27,7 +29,7 @@ class Breaker():
|
|
|
|
|
def derive_alphabet_freq(freq: Counter):
|
|
|
|
|
most_freq = [ item[0] for item in freq.most_common() ]
|
|
|
|
|
#return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
|
|
|
|
|
return OrderedDict(zip(EN_LETTER_FREQ, most_freq))
|
|
|
|
|
return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## pattern matching
|
|
|
|
|
@ -40,32 +42,65 @@ class Breaker():
|
|
|
|
|
TUPLE(word, pos) where `pos' is a LIST of matching positions.
|
|
|
|
|
"""
|
|
|
|
|
with open(word_file, 'r') as f:
|
|
|
|
|
for word in f:
|
|
|
|
|
for line in f:
|
|
|
|
|
word = line[:-1]
|
|
|
|
|
pos = []
|
|
|
|
|
for char in char_list:
|
|
|
|
|
pos += word.find(char)
|
|
|
|
|
pos.append(word.find(char))
|
|
|
|
|
if -1 not in pos:
|
|
|
|
|
return word, pos
|
|
|
|
|
return None
|
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def positions(text: str, sub):
|
|
|
|
|
index = text.find(sub)
|
|
|
|
|
while index != -1:
|
|
|
|
|
yield index
|
|
|
|
|
index = text.find(sub, index + 1)
|
|
|
|
|
return index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def match_ciphertext(text: str, word_pos: tuple, char: tuple):
|
|
|
|
|
word, wposl = word_pos
|
|
|
|
|
wpos = wposl[0]
|
|
|
|
|
wlen = len(word)
|
|
|
|
|
|
|
|
|
|
snip_count = Counter()
|
|
|
|
|
for pos in Breaker.positions(text, char):
|
|
|
|
|
word_begin = pos - wpos
|
|
|
|
|
snippet = text[word_begin : word_begin + wlen]
|
|
|
|
|
|
|
|
|
|
if snippet not in snip_count.elements():
|
|
|
|
|
snip_count[snippet] = text.count(snippet)
|
|
|
|
|
|
|
|
|
|
return snip_count.most_common(1)[0][0]
|
|
|
|
|
|
|
|
|
|
def __init__(self, ciphertext, word_file):
|
|
|
|
|
self.alph = derive_alphabet_freq(get_frequency(ciphertext))
|
|
|
|
|
self.word_file = None # TODO
|
|
|
|
|
self.ciphertext = ciphertext
|
|
|
|
|
self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext))
|
|
|
|
|
self.word_file = word_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_key(self):
|
|
|
|
|
most_freq = next(iter(self.alph)) # most frequent char
|
|
|
|
|
word_having_char, pos = get_word_containing(self.word_file, most_freq)
|
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt"
|
|
|
|
|
# most frequent char in English and corresponding most common char in text
|
|
|
|
|
#most_freq = self.alph.popitem(last=False)
|
|
|
|
|
most_freq = next(iter(self.alph))
|
|
|
|
|
word_pos = Breaker.get_word_containing(
|
|
|
|
|
self.word_file,
|
|
|
|
|
#most_freq[0]
|
|
|
|
|
most_freq
|
|
|
|
|
)
|
|
|
|
|
most_common = Breaker.match_ciphertext(
|
|
|
|
|
self.ciphertext,
|
|
|
|
|
word_pos,
|
|
|
|
|
most_freq
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
print("most_freq", most_freq, "word_pos:", word_pos, "most_common:", most_common)
|
|
|
|
|
|
|
|
|
|
freq = get_frequency(text)
|
|
|
|
|
alph = derive_alphabet(freq)
|
|
|
|
|
|
|
|
|
|
print(alph.values())
|
|
|
|
|
## end Breaker
|
|
|
|
|
|