o
    mĎiC                     @  s  d Z ddlmZ ddlZddlZddlZddlmZm	Z	 er$ddl
mZ edZejdZejeZeje eejd< ed	e_ed
e_ede_ede_eje_edeje_edeje_edZG dd dejZ e e_G dd dejZ!dS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownz--!?>zhtml.parser
htmlparserz<[a-zA-Z]|</>z</[a-zA-Z]?z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
a  
  [a-zA-Z][^`\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:=                            # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
z^([ ]*\n){2}c                      s.   e Zd ZdZ fddZd	 fddZ  ZS )
_HTMLParserz"Handle special start and end tags.c                   s   | j ||d  }t|d }t|dk s+d|  krdks;n d|  kr*dks;n | | j ||d   |d S t |S )N   A   Z   a   z      )rawdataordlenhandle_datasuperparse_endtag)selfistartc	__class__ O/home/jeff/fluffinator/venv/lib/python3.10/site-packages/markdown/htmlparser.pyr   c   s   8z_HTMLParser.parse_endtagr   intreturnc                   sB   | j ||d  dkr| | j ||d   |d S t |S )Nr   </>)r   r   r   parse_starttagr   r   r   r   r   r    k   s   z_HTMLParser.parse_starttagr   r   r   r   )__name__
__module____qualname____doc__r   r    __classcell__r   r   r   r   r   `   s    r   c                      s"  e Zd ZU dZdC fddZ fddZ fdd	ZedDddZdEddZ	dFddZ
dGddZdHddZdIddZdJdd ZdHd!d"ZdKd$d%ZdKd&d'ZdId(d)ZdId*d+ZdId,d-ZdId.d/ZdL fd1d2ZdMd4d5ZdL fd6d7ZdNdO fd:d;Zd<Zd=ed>< dPd?d@ZdLdAdBZ  ZS )QHTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    mdr   c                   s@   d|vrd|d< t dg| _dg| _t j|i | || _d S )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cacher   __init__r)   )r   r)   argskwargsr   r   r   r/      s   
zHTMLExtractor.__init__c                   s4   d| _ d| _g | _g | _g | _dg| _t   dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr.   r   resetr   r   r   r   r7      s   zHTMLExtractor.resetc                   sv   t    t| jr | jr| js| t| j n| | j t| j	r9| j
| jjd| j	 g | _	dS dS )zHandle any buffered data. N)r   closer   r   r*   
cdata_elemr   r   unescaper5   r6   appendr)   	htmlStashstorejoinr8   r   r   r   r:      s   



zHTMLExtractor.closer   r   c                 C  sj   t t| jd | jd D ]}| j| }| jd|}|dkr$t| j}| j|d  q| j| jd  S )zHReturns char index in `self.rawdata` for the start of the current line.    
r	   )ranger   r.   linenor   findr=   )r   iilast_line_start_poslf_posr   r   r   line_offset   s   

zHTMLExtractor.line_offsetboolc                 C  s<   | j dkrdS | j dkrdS | j| j| j| j    dkS )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   Tr   Fr9   )offsetr   rI   stripr8   r   r   r   at_line_start   s
   

 zHTMLExtractor.at_line_starttagstrc                 C  s<   | j | j }tj| j|}|r| j||  S d|S )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)rI   rK   r   	endendtagsearchr   endformat)r   rN   r   mr   r   r   get_endtag_text   s
   
zHTMLExtractor.get_endtag_textattrsSequence[tuple[str, str]]c                 C  s   || j v r| || d S | j|r&| js|  r&| js&d| _| jd | 	 }| jr;| j
| | j| d S | j| || jv rL|   d S d S )NTrB   )r-   handle_startendtagr)   is_block_levelr3   rM   r2   r6   r=   get_starttag_textr4   r5   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rN   rV   textr   r   r   handle_starttag   s   
 
zHTMLExtractor.handle_starttagc                 C  s   |  |}| jrc| j| || jv r!| jr!| j |krn| jst| jdkrat| j	| j
| j t| d  rA| jd nd| _d| _| j| jjd| j | jd g | _d S d S | j| d S )Nr   rB   TFr9   

)rU   r2   r5   r=   r4   popr   blank_line_rematchr   rI   rK   r3   r6   r)   r>   r?   r@   )r   rN   r]   r   r   r   handle_endtag   s$   

$
zHTMLExtractor.handle_endtagdatac                 C  s:   | j r
d|v r
d| _ | jr| j| d S | j| d S )NrB   F)r3   r2   r5   r=   r6   r   rd   r   r   r   r     s
   zHTMLExtractor.handle_datais_blockc                 C  s   | j s| jr| j| dS |  r[|r[t| j| j| j	 t
| d r+|d7 }nd| _| jr6| jd nd}|dsH|drH| jd | j| jj| | jd dS | j| dS )z Handle empty tags (`<data>`). NrB   Tr	   r9   r_   )r2   r3   r5   r=   rM   ra   rb   r   rI   rK   r   r6   endswithr)   r>   r?   )r   rd   rf   itemr   r   r   handle_empty_tag	  s   $
zHTMLExtractor.handle_empty_tagc                 C  s   | j |  | j|d d S )Nrf   )ri   rZ   r)   rY   )r   rN   rV   r   r   r   rX      s   z HTMLExtractor.handle_startendtagnamec                 C     | j d|dd d S )Nz&#{};Frj   ri   rS   r   rk   r   r   r   handle_charref#     zHTMLExtractor.handle_charrefc                 C  rl   )Nz&{};Frj   rm   rn   r   r   r   handle_entityref&  rp   zHTMLExtractor.handle_entityrefc                 C  rl   )Nz	<!--{}-->Trj   rm   re   r   r   r   handle_comment)  s   zHTMLExtractor.handle_commentc                 C  rl   )Nz<!{}>Trj   rm   re   r   r   r   handle_decl-  rp   zHTMLExtractor.handle_declc                 C  rl   )Nz<?{}?>Trj   rm   re   r   r   r   	handle_pi0  rp   zHTMLExtractor.handle_pic                 C  s,   | drdnd}| jd||dd d S )NzCDATA[z]]>z]>z<![{}{}Trj   )
startswithri   rS   )r   rd   rR   r   r   r   unknown_decl3  s   zHTMLExtractor.unknown_declr   c                   s,   |   s| jrt |S | d |d S )Nz<?r   )rM   r3   r   parse_pir   r!   r   r   r   rw   7  s   
zHTMLExtractor.parse_piTc                 C  sj   | j }|d|sJ dt||d }|s | d |d S |r1| }| ||d |  | S )Nz<!--z"unexpected call to parse_comment()   <rA   )r   ru   commentcloserQ   r   r   rr   rR   )r   r   reportr   rb   jr   r   r   parse_commentA  s   
zHTMLExtractor.parse_commentc                   s   |   s| jr>| j||d  dkr8| j||d  dks8| |}|dkr6| | j||d   |d S |S t |S | d |d S )	Nr   z<![	   z	<![CDATA[r	   rA   z<!r   )rM   r3   r   parse_bogus_commentr   r   parse_html_declaration)r   r   resultr   r   r   r   M  s   ,

z$HTMLExtractor.parse_html_declarationr   r{   c                   s6   t  ||}|dkrdS | j| j|| dd |S )Nr	   Frj   )r   r   ri   r   )r   r   r{   posr   r   r   r   ]  s
   z!HTMLExtractor.parse_bogus_commentNz
str | None_HTMLExtractor__starttag_textc                 C  s   | j S )z)Return full source of start tag: `<...>`.)r   r8   r   r   r   rZ   l  s   zHTMLExtractor.get_starttag_textc                 C  s@  | j ||d  dkr| | j ||d   |d S d | _| |}|dk r7| | j ||d   |d S | j }||| | _g }tj||d }|sRJ d| }|d	  | _
}||k rtj||}|snnT|ddd\}	}
}|
s}d }n-|d d d  kr|dd  ksn |d d d	  kr|dd  krn n|dd }|rt|}||		 |f | }||k sd|||  }|d
vr|  \}}d| jv r|| jd }t| j| jd }n|t| j }| |||  |S |dr| || |S || jv r| | | || |S )Nr   r   r   rA   z#unexpected call to parse_starttag()r   'r	   ")>/>rB   r   )r   r   r   check_for_whole_start_tagr   tagfind_tolerantrb   rR   grouplowerlasttagattrfind_tolerantr<   r=   rL   getposcountr   rfindrg   rX   r[   set_cdata_moder^   )r   r   endposr   rV   rb   krN   rT   attrnamerest	attrvaluerR   rD   rK   r   r   r   r    p  s`   
&(



zHTMLExtractor.parse_starttag)r)   r   )r   r   )r   rJ   )rN   rO   r   rO   )rN   rO   rV   rW   )rN   rO   )rd   rO   )rd   rO   rf   rJ   )rk   rO   r"   )T)r   )r   r   r{   r   r   r   )r   rO   )r#   r$   r%   r&   r/   r7   r:   propertyrI   rM   rU   r^   rc   r   ri   rX   ro   rq   rr   rs   rt   rv   rw   r}   r   r   r   __annotations__rZ   r    r'   r   r   r   r   r(   x   s6   
 















r(   )"r&   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   compilerz   util	find_specspecmodule_from_specr   loaderexec_modulemodulesstarttagopen
endtagopenpiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantlocatetagendra   
HTMLParserr   r(   r   r   r   r   <module>   s8   


