|  |  | @ -1,3 +1,5 @@ | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | #!/usr/bin/env python | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | """ |  |  |  | """ | 
			
		
	
		
		
			
				
					
					|  |  |  | Python module to derive a key from an monoalphabetically encrypted file. |  |  |  | Python module to derive a key from an monoalphabetically encrypted file. | 
			
		
	
		
		
			
				
					
					|  |  |  | """ |  |  |  | """ | 
			
		
	
	
		
		
			
				
					|  |  | @ -27,7 +29,7 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |     def derive_alphabet_freq(freq: Counter): |  |  |  |     def derive_alphabet_freq(freq: Counter): | 
			
		
	
		
		
			
				
					
					|  |  |  |         most_freq = [ item[0] for item in freq.most_common() ] |  |  |  |         most_freq = [ item[0] for item in freq.most_common() ] | 
			
		
	
		
		
			
				
					
					|  |  |  |         #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) |  |  |  |         #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) | 
			
		
	
		
		
			
				
					
					|  |  |  |         return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) |  |  |  |         return OrderedDict(zip(Breaker.EN_LETTER_FREQ, most_freq)) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     ## pattern matching |  |  |  |     ## pattern matching | 
			
		
	
	
		
		
			
				
					|  |  | @ -40,32 +42,65 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |         TUPLE(word, pos) where `pos' is a LIST of matching positions. |  |  |  |         TUPLE(word, pos) where `pos' is a LIST of matching positions. | 
			
		
	
		
		
			
				
					
					|  |  |  |         """ |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         with open(word_file, 'r') as f: |  |  |  |         with open(word_file, 'r') as f: | 
			
		
	
		
		
			
				
					
					|  |  |  |             for word in f: |  |  |  |             for line in f: | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 word = line[:-1] | 
			
		
	
		
		
			
				
					
					|  |  |  |                 pos = [] |  |  |  |                 pos = [] | 
			
		
	
		
		
			
				
					
					|  |  |  |                 for char in char_list: |  |  |  |                 for char in char_list: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     pos += word.find(char) |  |  |  |                     pos.append(word.find(char)) | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |                 if -1 not in pos: |  |  |  |                 if -1 not in pos: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     return word, pos |  |  |  |                     return word, pos | 
			
		
	
		
		
			
				
					
					|  |  |  |         return None |  |  |  |         return None, None | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     @staticmethod | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     def positions(text: str, sub): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         index = text.find(sub) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         while index != -1: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             yield index | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             index = text.find(sub, index + 1) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         return index | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     @staticmethod | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     def match_ciphertext(text: str, word_pos: tuple, char: tuple): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         word, wposl = word_pos | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         wpos = wposl[0] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         wlen = len(word) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         snip_count = Counter() | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         for pos in Breaker.positions(text, char): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             word_begin = pos - wpos | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             snippet = text[word_begin : word_begin + wlen] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             if snippet not in snip_count.elements(): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 snip_count[snippet] = text.count(snippet) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         return snip_count.most_common(1)[0][0] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def __init__(self, ciphertext, word_file): |  |  |  |     def __init__(self, ciphertext, word_file): | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.alph = derive_alphabet_freq(get_frequency(ciphertext)) |  |  |  |         self.ciphertext = ciphertext | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         self.word_file = None  # TODO |  |  |  |         self.alph = self.derive_alphabet_freq(self.get_frequency(ciphertext)) | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         self.word_file = word_file | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def get_key(self): |  |  |  |     def get_key(self): | 
			
		
	
		
		
			
				
					
					|  |  |  |         most_freq = next(iter(self.alph))  # most frequent char |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         word_having_char, pos = get_word_containing(self.word_file, most_freq) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         pass |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | text = "gryticdettpjcjgtmtntajgryticdkrqstmkojgjgtmtrmjgtatnrgdpmatmjgcdnrpmhgoqmatpetopzsoqqtmfcajgtoaeatrmrpmjgtfozdatnmcpjqotjgtinltrscfgdwrpmontrntedjktmckgrjktkrpjrpmktjgopskgrjktlqtrntgryticdqoytmjgttultaotphtgryticdkojptnntmjgtlqrzdtltclqtwrsopzereotnncwtjowtnxdnjjctnhrltopjgonqrpmcfhcwltjojocpjgthcwlrnnocponzcptitjktozpcatjgtpttmirpmktsttlldngopzcpktsttlldngopzcpjgononxdnjrldpsachsncpzkaojjtpfcajgtltclqtkgchrpnttncwtjgopznkacpzqostrpjnoprhcqcpiktmccdangratedjjgtatnncwrpicjgtafdhsopopnthjncdjjgtatrpmjgononxdnjrldpsachsncpzqostkcastanoprfrhjcaiktmccdangratedjjgtatnncwrpicjgtafdhsopacecjncdjjgtatgryticdyonojtmjgtbdrzwoatgryticdnkrwopjgtngojjgtlrajihcpytpjocpnrpmjgtatrqlcqojosjgtfrhtnrqkrinmofftatpjjgtagtjcaohjgtnrwtedjktnkrqqckojrpmktnttpcjgopzhgrpztpcjgopzgrnhgrpztmjtpwoqqocpmcqqrancprqcnopzhrwlrozpjktpjiwoqqocpnjrayopzrpmkaojgopzoplropeoznjacpzltclqtdpkoqqopzjczoytnwrqqopyonocprpmltanlthjoytcptopfoytsomnetqckjgtlcytajiqoptcptlcldqrjocpadppopcdjcfjowt" |  |  |  |         # most frequent char in English and corresponding most common char in text | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         #most_freq = self.alph.popitem(last=False) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         most_freq = next(iter(self.alph)) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         word_pos = Breaker.get_word_containing( | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             self.word_file, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             #most_freq[0] | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             most_freq | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         ) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         most_common = Breaker.match_ciphertext( | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             self.ciphertext, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             word_pos, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             most_freq | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         ) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         print("most_freq", most_freq, "word_pos:", word_pos, "most_common:", most_common) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | freq = get_frequency(text) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | alph = derive_alphabet(freq) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | print(alph.values()) |  |  |  |     ## end Breaker | 
			
				
				
			
		
	
		
		
	
	
		
		
			
				
					|  |  | 
 |