
    )!h|F                     >   d Z ddlZddlZddlmZ dgZ ej                  d      Z ej                  d      Z ej                  d      Z	 ej                  d      Z
 ej                  d	      Z ej                  d
      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  dej"                        Z ej                  d      Z ej                  d      Z G d dej*                        Zy)zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                        e Zd ZdZdZdd fd
Z fdZd Zd Zd	Z	d
 Z
d Zd Zd Zd ZddZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z xZS )r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstyleT)convert_charrefsc                P    t         |           || _        | j                          y)zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)super__init__r	   reset)selfr	   	__class__s     "/usr/lib/python3.12/html/parser.pyr   zHTMLParser.__init__W   s!     	 0

    c                 b    d| _         d| _        t        | _        d| _        t
        |           y)z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elemr   r   )r   r   s    r   r   zHTMLParser.reseta   s)    -r   c                 N    | j                   |z   | _         | j                  d       y)zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datas     r   feedzHTMLParser.feedi   s     ||d*Qr   c                 &    | j                  d       y)zHandle any buffered data.   N)r   r   s    r   closezHTMLParser.closer   s    Qr   Nc                     | j                   S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr    s    r   get_starttag_textzHTMLParser.get_starttag_textx   s    ###r   c                     |j                         | _        t        j                  d| j                  z  t        j                        | _        y )Nz</\s*%s\s*>)lowerr   recompileIr   )r   elems     r   set_cdata_modezHTMLParser.set_cdata_mode|   s/    **,::nt&FMr   c                 (    t         | _        d | _        y N)r   r   r   r    s    r   clear_cdata_modezHTMLParser.clear_cdata_mode   s    -r   c                    | j                   }d}t        |      }||k  r-| j                  rq| j                  se|j	                  d|      }|dk  r|j                  dt        ||dz
              }|dk\  r't        j                  d      j                  ||      sn|}n?| j                  j                  ||      }|r|j                         }n| j                  rns|}||k  rJ| j                  r*| j                  s| j                  t        |||              n| j                  |||        | j                  ||      }||k(  rn	|j                  } |d|      r1t         j#                  ||      r| j%                  |      }	n |d|      r| j'                  |      }	nt |d|      r| j)                  |      }	nY |d|      r| j+                  |      }	n> |d	|      r| j-                  |      }	n#|d
z   |k  s|r| j                  d       |d
z   }	nn<|	dk  r`|sn2t         j#                  ||      rnB |d|      rK|dz   |k(  r| j                  d       nt.        j#                  ||      rn| j1                  ||dz   d         n |d|      rF|}dD ]'  }
|j3                  |
|dz         s|t        |
      z  } n | j1                  ||dz   |        n |d|      r| j5                  ||dz   d         n~|||dz    j7                         dk(  r| j9                  ||dz   d         nM |d	|      r| j1                  ||dz   d         n, |d|      r| j;                  ||dz   d         nt=        d      |}	| j                  ||	      }n |d|      rt>        j#                  ||      }|rY|jA                         dd }| jC                  |       |jE                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }d||d  v r,| j                  |||dz           | j                  ||dz         }n |d|      rtF        j#                  ||      }|rW|jA                  d
      }| jI                  |       |jE                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }tJ        j#                  ||      }|rE|rB|jA                         ||d  k(  r,|jE                         }	|	|k  r|}	| j                  ||d
z         }n>|d
z   |k  r'| j                  d       | j                  ||d
z         }nnJ d       ||k  r-|rm||k  rh| j                  s\| j                  r*| j                  s| j                  t        |||              n| j                  |||        | j                  ||      }||d  | _         y )Nr   <&"   z[\s;]</<!--<?<!r      )z--!z---   z	<![CDATA[   	   	<!doctypewe should not get here!z&#;zinteresting.search() lied)&r   lenr	   r   findrfindmaxr'   r(   searchr   starthandle_datar   	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declaration
endtagopenhandle_commentendswithunknown_declr&   handle_decl	handle_piAssertionErrorcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   rZ   r   injampposrJ   rH   ksuffixnames               r   r   zHTMLParser.goahead   s   ,,L!e$$T__LLa(q5 %]]3Aqt=F!JJx077HA((//;AA1u(($$Xgal%;<$$WQq\2q!$AAvu ++J#q!%%gq1++A.Aa())!,A***1-Aa(a(Aa(33A6A!eq[C$$S)AAq5#))'15#D!,q5A: ,,T2'--gq9  !//!>#FA.&8 &F&//!< !S[ 0 %& ++GAaCN;#K3))'!A#$-8 1Q3--/;>((17#D!,++GAaCDM:#D!,wqst}5,-FGGANN1a(D!$gq1 ;;=2.D''-		A%c1Q3/Eq!,Agabk)((1Q38 NN1ac2C#!3 ;;q>D))$/		A%c1Q3/Eq!,A"((!4u{{};!IIK6 !A NN1a!e4!eq[ $$S)q!a%0A555qw !ez 1q5$$T__  '!A,!78  1.q!$Aqr{r   c                 p   | j                   }|||dz    dk(  sJ d       |||dz    dk(  r| j                  |      S |||dz    dk(  r| j                  |      S |||dz    j                         d	k(  r7|j	                  d
|dz         }|dk(  ry| j                  ||dz   |        |dz   S | j                  |      S )Nr7   r6   z+unexpected call to parse_html_declaration()r9   r4   r:   z<![r;   r<   r   r>   r   )r   rM   parse_marked_sectionr&   rA   rT   parse_bogus_comment)r   r^   r   gtposs       r   rO   z!HTMLParser.parse_html_declaration  s    ,,q1~% 	D )C 	D%1QqS>V#%%a((Qqs^u$,,Q//Qqs^!!#{2LLac*E{WQqS/07N++A..r   c                     | j                   }|||dz    dv sJ d       |j                  d|dz         }|dk(  ry|r| j                  ||dz   |        |dz   S )Nr7   )r6   r3   z"unexpected call to parse_comment()r   r>   r   )r   rA   rQ   )r   r^   reportr   poss        r   rg   zHTMLParser.parse_bogus_comment(  su    ,,q1~- 	C 1B 	C-ll3!$"9!C 01Qwr   c                     | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }| j	                  ||dz   |        |j                         }|S )Nr7   r5   zunexpected call to parse_pi()r>   )r   picloserD   rE   rU   rZ   )r   r^   r   rJ   r`   s        r   rN   zHTMLParser.parse_pi4  st    ,,q1~%F'FF%w!,KKMwqsA'IIKr   c                 ~   d | _         | j                  |      }|dk  r|S | j                  }||| | _         g }t        j	                  ||dz         }|sJ d       |j                         }|j                  d      j                         x| _        }||k  rt        j	                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d dcxk(  r|dd  k(  rn n|dd }|rt        |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d	vr| j                  |||        |S |j                  d
      r| j                  ||       |S | j!                  ||       || j"                  v r| j%                  |       |S )Nr   r   z#unexpected call to parse_starttag()r7   r:   'r>   ")r   />rq   )r#   check_for_whole_start_tagr   tagfind_tolerantrJ   rZ   rX   r&   r   attrfind_tolerantr   appendstriprF   rR   handle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr+   )r   r^   endposr   attrsrJ   rb   tagmattrnamerest	attrvaluerZ   s                r   rK   zHTMLParser.parse_starttag@  s   #//2A:M,,&q0  &&w!4;;;uIIK"[[^1133s&j!''3A()1a(8%HdI 	2A$8)BC.82A#7237%aO	$Y/	LL(..*I67A &j a%%'k!WQv./M<<##C/
    e,d111##C(r   c                 H   | j                   }t        j                  ||      }|rt|j                         }|||dz    }|dk(  r|dz   S |dk(  r6|j	                  d|      r|dz   S |j	                  d|      ry||kD  r|S |dz   S |dk(  ry|dv ry||kD  r|S |dz   S t        d	      )
Nr   r   /rq   r7   r>   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZr=   )r   locatestarttagend_tolerantrJ   rZ   rH   rV   )r   r^   r   r}   r`   nexts         r   rr   z$HTMLParser.check_for_whole_start_tagl  s    ,,&,,Wa8A1QqS>Ds{1us{%%dA.q5L%%c1-q5Hq5Lrz 5 6 1u1u677r   c                    | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }t        j                  ||      }|s| j                  | j                  |||        |S t        j                  ||dz         }|s!|||dz    dk(  r|dz   S | j                  |      S |j                  d      j                         }|j                  d|j                               }| j                  |       |dz   S |j                  d      j                         }| j                  %|| j                  k7  r| j                  |||        |S | j                  |       | j                          |S )	Nr7   r3   zunexpected call to parse_endtagr   r>   r:   z</>r   )r   	endendtagrD   rZ   
endtagfindrJ   r   rF   rs   rg   rX   r&   rA   handle_endtagr.   )r   r^   r   rJ   rh   	namematchtagnamer*   s           r   rL   zHTMLParser.parse_endtag  sy   ,,q1~%H'HH%  !A#.		  !,*  5!12(..w!<I1QqS>U*Q3J33A66ooa(..0G
 LLimmo6Ew'7N{{1~##%??&t&  5!124 r   c                 J    | j                  ||       | j                  |       y r-   )rx   r   r   r|   r{   s      r   rw   zHTMLParser.handle_startendtag  s     S%(3r   c                      y r-    r   s      r   rx   zHTMLParser.handle_starttag      r   c                      y r-   r   )r   r|   s     r   r   zHTMLParser.handle_endtag  r   r   c                      y r-   r   r   rd   s     r   rY   zHTMLParser.handle_charref  r   r   c                      y r-   r   r   s     r   r\   zHTMLParser.handle_entityref  r   r   c                      y r-   r   r   s     r   rF   zHTMLParser.handle_data  r   r   c                      y r-   r   r   s     r   rQ   zHTMLParser.handle_comment  r   r   c                      y r-   r   )r   decls     r   rT   zHTMLParser.handle_decl  r   r   c                      y r-   r   r   s     r   rU   zHTMLParser.handle_pi  r   r   c                      y r-   r   r   s     r   rS   zHTMLParser.unknown_decl  r   r   )r   )__name__
__module____qualname____doc__ry   r   r   r   r!   r#   r$   r+   r.   r   rO   rg   rN   rK   rr   rL   rw   rx   r   rY   r\   rF   rQ   rT   rU   rS   __classcell__)r   s   @r   r   r   ?   s    * 1+/  O$NG#X/*		(X8D%P 
r   )r   r'   _markupbasehtmlr   __all__r(   r   r]   r[   rW   rI   rP   rm   commentclosers   rt   VERBOSEr   r   r   
ParserBaser   r   r   r   <module>r      s#   " 
   .  RZZ' RZZ%
BJJ>?	
"**@
Arzz+&RZZ%

"**S/rzz)$ 2::LM BJJ=>  (RZZ ) ZZ  BJJsO	 RZZ>?
\'' \r   