|  |  | @ -8,12 +8,13 @@ from collections import OrderedDict | 
			
		
	
		
		
			
				
					
					|  |  |  | from collections import Counter |  |  |  | from collections import Counter | 
			
		
	
		
		
			
				
					
					|  |  |  | from string import ascii_lowercase |  |  |  | from string import ascii_lowercase | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | #from libex01 import read_text |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | def mono_break(enc_txt: str): |  |  |  | class Breaker(): | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     ## frequency analysis | 
			
		
	
		
		
			
				
					
					|  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") |  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     @staticmethod | 
			
		
	
		
		
			
				
					
					|  |  |  |     def get_frequency(text): |  |  |  |     def get_frequency(text): | 
			
		
	
		
		
			
				
					
					|  |  |  |         freq = Counter(text) |  |  |  |         freq = Counter(text) | 
			
		
	
		
		
			
				
					
					|  |  |  |         # Counter with lowercase ascii letters all having a count of 0 |  |  |  |         # Counter with lowercase ascii letters all having a count of 0 | 
			
		
	
	
		
		
			
				
					|  |  | @ -21,76 +22,24 @@ def mono_break(enc_txt: str): | 
			
		
	
		
		
			
				
					
					|  |  |  |         freq.update(missing) |  |  |  |         freq.update(missing) | 
			
		
	
		
		
			
				
					
					|  |  |  |         return freq |  |  |  |         return freq | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     def derive_alphabet(freq: Counter): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         return OrderedDict(zip(list(freq.keys()), EN_LETTER_FREQ)) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     subs = derive_alphabet(get_frequency(enc_txt)) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     dec = "" |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     for char in enc_txt: |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         dec += subs[char] |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     return dec |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | ## Frequenzanalyse |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | def get_frequency(text): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     freq = Counter(text) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     # Counter with lowercase ascii letters all having a count of 0 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     missing = Counter(dict(it.product(ascii_lowercase, [0]))) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     freq.update(missing) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     return freq |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | def derive_alphabet_freq(freq: Counter): |  |  |  |     @staticmethod | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     def derive_alphabet_freq(freq: Counter): | 
			
		
	
		
		
			
				
					
					|  |  |  |         most_freq = [ item[0] for item in freq.most_common() ] |  |  |  |         most_freq = [ item[0] for item in freq.most_common() ] | 
			
		
	
		
		
			
				
					
					|  |  |  |         #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) |  |  |  |         #return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) | 
			
		
	
		
		
			
				
					
					|  |  |  |         return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) |  |  |  |         return OrderedDict(zip(EN_LETTER_FREQ, most_freq)) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | ## Mustersuche |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | def next_char_anchor(text: str, char): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     """ |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     Generator that takes the text and a char and yields positions of that char. |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     Adjust starting position by slicing. |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     Return generator closure. |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     """ |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     index = text.find(char) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     while index != -1: |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         yield (index, char) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         index = text.find(char, index + 1) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     return (index, char) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | def match_word(text: str, word: str, pos_iter): |  |  |  |     ## pattern matching | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     """ |  |  |  |     @staticmethod | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     Align with anchor and check hypothesis. |  |  |  |     def get_word_containing(word_file, char_list: list): | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     First hypothesis is the frequency analysis. |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     Align `word' with `text' for each anchor. |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     For remaining anchors check the occurence of pattern |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     and match with word. If it is true more than once, save it as a |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     new hypothesis. |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     """ |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     pass |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | def next_match(text: str, word: str, alphabet: dict, pos_iter: generator): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     pass |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | class Breaker(): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     EN_LETTER_FREQ = list("etaoinsrhdlucmfywgpbvkxqjz") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     def __init__(self, ciphertext, word_file): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.alph = derive_alphabet_freq(get_frequency(ciphertext)) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         self.word_file = None  # TODO |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     def get_key(self): |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         def get_word_containing(char_list: list): |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         """ |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |         Find word from a word list file (common.txt) containing the chars |  |  |  |         Find word from a word list file (common.txt) containing the chars | 
			
		
	
		
		
			
				
					
					|  |  |  |         in `char_list'. |  |  |  |         in `char_list'. | 
			
		
	
		
		
			
				
					
					|  |  |  |         Return None it no word matches or |  |  |  |         Return None it no word matches or | 
			
		
	
		
		
			
				
					
					|  |  |  |         TUPLE(word, pos) where `pos' is a LIST of matching positions. |  |  |  |         TUPLE(word, pos) where `pos' is a LIST of matching positions. | 
			
		
	
		
		
			
				
					
					|  |  |  |         """ |  |  |  |         """ | 
			
		
	
		
		
			
				
					
					|  |  |  |             with open(self.word_file, 'r') as f: |  |  |  |         with open(word_file, 'r') as f: | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |             for word in f: |  |  |  |             for word in f: | 
			
		
	
		
		
			
				
					
					|  |  |  |                 pos = [] |  |  |  |                 pos = [] | 
			
		
	
		
		
			
				
					
					|  |  |  |                 for char in char_list: |  |  |  |                 for char in char_list: | 
			
		
	
	
		
		
			
				
					|  |  | @ -99,10 +48,15 @@ class Breaker(): | 
			
		
	
		
		
			
				
					
					|  |  |  |                     return word, pos |  |  |  |                     return word, pos | 
			
		
	
		
		
			
				
					
					|  |  |  |         return None |  |  |  |         return None | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         most_freq = next(iter(self.alph))  # most frequent char |  |  |  | 
 | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |         word_having_char, pos = get_word_containing(most_freq)  # unpack |  |  |  |     def __init__(self, ciphertext, word_file): | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         self.alph = derive_alphabet_freq(get_frequency(ciphertext)) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         self.word_file = None  # TODO | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     def get_key(self): | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         most_freq = next(iter(self.alph))  # most frequent char | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         word_having_char, pos = get_word_containing(self.word_file, most_freq) | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         pass |  |  |  |         pass | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | 
 |