
    -_g(                     .    d dl Z d dlmZ  G d de      Zy)    N)SoSMapc                        e Zd ZdZg dZddgZdZdZdZdZ	dZ
i Zi Zd Z fd	Zd
 Zd Z fdZd Zd Zd Zd Z xZS )SoSHostnameMapaL  Mapping store for hostnames and domain names

    Hostnames are obfuscated using an incrementing counter based on the total
    number of hosts matched regardless of domain name.

    Domain names are obfuscated based on the host's hostname, plus any user
    defined domains passed in by the `--domains` options.

    Domains are obfuscated as whole units, meaning the domains 'example.com'
    and 'host.foo.example.com' will be separately obfuscated with no relation
    for example as 'obfuscatedomdain1.com' and 'obfuscatedomain2.com'.

    Top-level domains are left untouched.
    )	localhostz.*localdomain.*z^com..*wwwapi)
z.yamlz.ymlz.crtz.keyz.pemz.logz.repoz.rulesz.confz.cfgTr   c                    | j                   j                         D ](  \  }}t        |j                  d            dk(  r/| j                   |   | j                  |j                  d      d   <   S|j                  d      r"|j                  d      d   | j                  |<   dj                  |j                  d      dd       }|s| j                   j                         D ]`  \  }}dj                  |j                  d      dd       }||k(  s/dj                  |j                  d      dd       }|| j                  |<   b + | j                          y)zBecause we use 'intermediary' dicts for host names and domain names
        in this parser, we need to re-inject entries from the map_file into
        these dicts and not just the underlying 'dataset' dict
        .   r   obfuscateddomainN)	datasetitemslensplithosts
startswith_domainsjoinset_initial_counts)selfdomainob_pair_domain_to_injectexisting_domainvalue	_existing
_ob_domains           C/usr/lib/python3/dist-packages/sos/cleaner/mappings/hostname_map.pyload_domains_from_mapz$SoSHostnameMap.load_domains_from_map5   s=   
  $||113 	FOFG6<<$%*37<<3G

6<<,Q/0%%&89,3MM#,>q,ADMM&) %(HHV\\#->q-D$E!(.2ll.@.@.B F*OU #)>)>s)CCR)H II $55%(XXekk#.>s.C%D
;E&78	F%	F. 	!    c                 N    d|v r|j                  dd      }t        | 	  |      S )zOverride the base get_regex_result() to provide a regex that, if
        this is an FQDN or a straight domain, will include an underscore
        formatted regex as well.
        r
   z(\.|_))replacesuperget_regex_result)r   item	__class__s     r   r%   zSoSHostnameMap.get_regex_resultS   s,    
 $;<<Y/Dw'--r!   c                    	 t        | j                  j                         d      d   j                  d      d   }t	        |      dz   | _        	 t        | j                  j                         d      d   j                  d      }t	        |d   j                  d      d         dz   | _        y# t        $ r Y lw xY w# t        $ r Y yw xY w)	zSet the initial counter for host and domain obfuscation numbers
        based on what is already present in the mapping.
        T)reverser   hostr   r   r
   N)	sortedr   valuesr   int
host_count
IndexErrorr   domain_count)r   hds      r   r   z!SoSHostnameMap.set_initial_counts\   s    
	tzz((*D9!<BB6J1MA!!fqjDO	t}}++-t<Q?EEhOA #AaDJJsOA$6 7! ;D  		  		s%   AB0 AB? 0	B<;B<?	C
Cc                    || j                   v ry|j                  d      }dj                  |j                  d      dd       t        |      dk(  r|d   | j                  v S t        fd| j                   D              ryy)zkCheck if a potential domain is in one of the domains we've loaded
        and should be obfuscated
        Tr
   r   r   r   c              3   @   K   | ]  }j                  |        y wN)endswith).0_dno_tlds     r   	<genexpr>z?SoSHostnameMap.domain_name_in_loaded_domains.<locals>.<genexpr>{   s     ;rvr";s   F)r   r   r   r   r   any)r   r   r*   r9   s      @r   domain_name_in_loaded_domainsz,SoSHostnameMap.domain_name_in_loaded_domainsp   sv     T]]"||C &,,s+Ab12t9>7djj((;T]];;r!   c                 b   d}d}d }|j                  d      r||d   z  }|dd  }|j                  d      r|j                  d      r||d   z  }|dd }|j                  d      r|| j                  v r| j                  |   S | j                  |j	                               sdj                  |||g      S |j                  | j                        r.d|j                  d      d   z   }|j                  |d      }||z  }|| j                  vrt        | j                  j                         dt              D ]  }d	}|j                  |      }|j                  d      }	|	d   r|	d   | j                  v rd}t        |      dk(  s|d   sS|s6|d   j                  d      s|j                  |      rt        | 9  |      } nE|j                  |d         s	 |j                  |d         d   }
|d   t        | 9  |
      z   } n |st        | 9  |      }||z   |z   S # t        $ r Y w xY w)
N )r
   _r   r   r   r
   T)r)   keyF)r   r6   r   r<   lowerr   
strip_extsr   r#   r+   keysr   r   r$   get	Exception)r   r&   prefixsuffixfinalextr   _host_substr_test_hitmr'   s              r   rD   zSoSHostnameMap.get   s:    ooj)d1gF8D ooj) mmJ'd2hF":D mmJ' 4<<<<%%11$**,? 77FD&122==)

3++C<<R(DcMFt||# $DLL$5$5$7(+- 	$

9-__S) a5RUdjj0#'Lu:?%(#q):):3)?)-y)A!GK-E::eAh'"jjq215 %a57;s+; ;/8 GK%E~&& % s   ,H""	H.-H.c                 J   |j                  d      }t        |      dk(  r"| j                  |d   j                               S t        |      dk(  r5| j	                  |      }t        d |D              r|j                         }|S t        |      dkD  r|d   }|dd  }t        |      dkD  r | j                  |j                               }nd}| j	                  |      }|| j                  |<   dj                  ||g      }t        d |D              r|j                         }|S y )Nr
   r   r      c              3   <   K   | ]  }|j                           y wr5   isupperr7   r1   s     r   r:   z/SoSHostnameMap.sanitize_item.<locals>.<genexpr>        -1199;-   unknownc              3   <   K   | ]  }|j                           y wr5   rQ   rS   s     r   r:   z/SoSHostnameMap.sanitize_item.<locals>.<genexpr>   rT   rU   )	r   r   sanitize_short_namerA   sanitize_domainallupperr   r   )	r   r&   r*   dnamehostnamer   ob_hostname	ob_domain_fqdns	            r   sanitize_itemzSoSHostnameMap.sanitize_item   s	   zz#t9>++DGMMO<<t9>((.E---Lt9q=AwH!"XF8}q "66x~~7GH (,,V4I!*DLLHHk956E---Lr!   c                    |r|| j                   v r|S || j                  vrSd| j                   }|| j                  |<   | xj                  dz  c_        || j                  |<   | j	                  |       | j                  |   S )zObfuscate the short name of the host with an incremented counter
        based on the total number of obfuscated host names
        r*   r   )	skip_keysr   r.   r   add_regex_item)r   r]   ob_hosts      r   rX   z"SoSHostnameMap.sanitize_short_name   s{     8t~~5O4<<'T__-.G#*DJJx OOq O%,DLL")||H%%r!   c                    | j                   D ]:  }t        j                  |dj                  |            s)dj                  |      c S  |d   j	                         }dj                  |dd       j	                         }| j                  |      }dj                  ||g      }|| j                  dj                  |      <   |S )zeObfuscate the domainname, broken out into subdomains. Top-level
        domains are ignored.
        r
   r   r   )ignore_matchesrematchr   rA   _new_obfuscated_domainr   )r   r   _skip
top_domainr\   r_   s         r   rY   zSoSHostnameMap.sanitize_domain   s     (( 	(Exxsxx/0xx''	( BZ%%'
"&,,.//6	HHi45	)2SXXf%&r!   c                     || j                   vr1d| j                   | j                   |<   | xj                  dz  c_        | j                   |   S )zDGenerate an obfuscated domain for each subdomain name given
        r   r   )r   r0   )r   r\   s     r   rj   z%SoSHostnameMap._new_obfuscated_domain   sL     %%5d6G6G5H#IDMM% "}}U##r!   )__name__
__module____qualname____doc__rg   rc   rB   ignore_short_itemsmatch_full_words_onlyr.   r0   r   r   r    r%   r   r<   rD   ra   rX   rY   rj   __classcell__)r'   s   @r   r   r      sv    N 	I
-J  JLHE"<.( ;'z@&$r!   r   )rh   sos.cleaner.mappingsr   r    r!   r   <module>rw      s    
 'p$V p$r!   