o
    aĎiF4                     @  sr  d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
m
Z ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ erJdd	lmZ d
ZG dd deZeddd9ddZeddd:ddZ eddd:ddZ!eddd:ddZ"eddd:ddZ#G dd de	Z$eddd;d"d#Z%d<d(d)Z&		*d=d>d0d1Z'd?d3d4Z(d@d5d6Z)		*d=d>d7d8Z*d*S )Az
Grapheme cluster segmentation following Unicode Standard Annex #29.

This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
defined in UAX #29: Unicode Text Segmentation.

https://www.unicode.org/reports/tr29/
    )annotations)IntEnum)	lru_cache)TYPE_CHECKING
NamedTuple   )bisearch)
GRAPHEME_L
GRAPHEME_T
GRAPHEME_VGRAPHEME_LVINCB_EXTENDINCB_LINKERGRAPHEME_LVTINCB_CONSONANTGRAPHEME_EXTENDGRAPHEME_CONTROLGRAPHEME_PREPENDGRAPHEME_SPACINGMARKEXTENDED_PICTOGRAPHICGRAPHEME_REGIONAL_INDICATOR)Iterator    c                   @  sH   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdS )GCBz'Grapheme Cluster Break property values.r   r                        	   
            N)__name__
__module____qualname____doc__OTHERCRLFCONTROLEXTENDZWJREGIONAL_INDICATORPREPENDSPACING_MARKLVTLVLVT r8   r8   L/home/jeff/fluffinator/venv/lib/python3.10/site-packages/wcwidth/grapheme.pyr   ,   s     r   i   )maxsizeucsintreturnc                 C  s   | dkrt jS | dkrt jS | dkrt jS t| trt jS t| tr%t jS t| t	r-t j
S t| tr5t jS t| tr=t jS t| trEt jS t| trMt jS t| trUt jS t| tr]t jS t| tret jS t jS )z;Return the Grapheme_Cluster_Break property for a codepoint.r%   r"   i   )r   r+   r,   r/   	_bisearchr   r-   r   r.   r   r0   r   r1   r   r2   r	   r3   r   r4   r
   r5   r   r6   r   r7   r*   r;   r8   r8   r9   _grapheme_cluster_breakB   s6   









r@   boolc                 C     t t| tS )z6Check if codepoint has Extended_Pictographic property.)rA   r>   r   r?   r8   r8   r9   _is_extended_pictographice      rC   c                 C  rB   )z,Check if codepoint has InCB=Linker property.)rA   r>   r   r?   r8   r8   r9   _is_incb_linkerk   rD   rE   c                 C  rB   )z/Check if codepoint has InCB=Consonant property.)rA   r>   r   r?   r8   r8   r9   _is_incb_consonantq   rD   rF   c                 C  rB   )z,Check if codepoint has InCB=Extend property.)rA   r>   r   r?   r8   r8   r9   _is_incb_extendw   rD   rG   c                   @  s"   e Zd ZU dZded< ded< dS )BreakResultz*Result of grapheme cluster break decision.rA   should_breakr<   ri_countN)r&   r'   r(   r)   __annotations__r8   r8   r8   r9   rH   }   s   
 rH   prev_gcbcurr_gcbBreakResult | Nonec                 C  s&  | t jkr|t jkrtdddS | t jt jt jfv r tdddS |t jt jt jfv r0tdddS | t jkrG|t jt jt jt jfv rGtdddS | t jt jfv r]|t jt j	fv r]tdddS | t jt j	fv rp|t j	krptdddS |t j
kr{tdddS |t jkrtdddS | t jkrtdddS dS )z
    Check simple GCB-pair-based break rules (cacheable).

    Returns BreakResult for rules that can be determined from GCB properties alone, or None if
    complex lookback rules (GB9c, GB11) need to be checked.
    Fr   rI   rJ   TN)r   r+   r,   rH   r-   r3   r4   r6   r7   r5   r.   r2   r1   )rL   rM   r8   r8   r9   _simple_break_check   s&   	" 


rP   textstrcurr_idxrJ   c                 C  sn  t | |}|dur|S |tjkrtdddS t|| }t|rVd}|d }|dkrVt|| }	t|	r;d}|d8 }nt|	rD|d8 }nt|	rQ|rPtdddS nn|dks*| tjkrt|r|d }|dkrt|| }	t	|	}
|
tj
kr{|d8 }nt|	rtdddS n|dksg| tjkr|tjkr|d dkrtd|d dS tdddS |tjkrdnd}td|dS )z
    Determine if there should be a grapheme cluster break between prev and curr.

    Implements UAX #29 grapheme cluster boundary rules.
    NFr   rO   r   Tr   )rP   r   r/   rH   ordrF   rE   rG   rC   r@   r.   r0   )rL   rM   rQ   rS   rJ   resultcurr_ucs
has_linkeriprev_ucs	prev_propr8   r8   r9   _should_break   sN   





r[   Nunistrstartend
int | NoneIterator[str]c           
      c  s    | sdS t | }|du r|}||ks||krdS t||}|}d}tt| | }|tjkr1d}t|d |D ]#}tt| | }t||| ||}	|	j}|	j	rY| || V  |}|}q8| || V  dS )aP  
    Iterate over grapheme clusters in a Unicode string.

    Grapheme clusters are "user-perceived characters" - what a user would
    consider a single character, which may consist of multiple Unicode
    codepoints (e.g., a base character with combining marks, emoji sequences).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings.

    Example::

        >>> list(iter_graphemes('cafe\u0301'))
        ['c', 'a', 'f', 'e\u0301']
        >>> list(iter_graphemes('\U0001F468\u200D\U0001F469\u200D\U0001F467'))
        ['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
        >>> list(iter_graphemes('\U0001F1FA\U0001F1F8'))
        ['o', 'k', '\U0001F1FA\U0001F1F8']

    .. versionadded:: 0.3.0
    Nr   r   )
lenminr@   rT   r   r0   ranger[   rJ   rI   )
r\   r]   r^   lengthcluster_startrJ   rL   idxrM   rU   r8   r8   r9   iter_graphemes   s.   

rg   posc                 C  sl  t | |d  }|dkr|dkr| |d  dkr|d S |dk rF|dkrB|dkrBt | |d  }|dkrBt|tjkrBt| |d S |d S |d }|dkr}|| tk r}t | | }d|  krddk rgn nnt|tjkron|d8 }|dkr}|| tk sT|}tt | | }|tjkrdnd}t|d |D ]}	tt | |	 }
t	||
| |	|}|j
}|jr|	}|
}q|S )a  
    Find the start of the grapheme cluster containing the character before pos.

    Scans backwards from pos to find a safe starting point, then iterates forward using standard
    break rules to find the actual cluster boundary.

    :param text: The Unicode string.
    :param pos: Position to search before (exclusive).
    :returns: Start position of the grapheme cluster.
    r   r"   r      r   r   )rT   r@   r   r1   _find_cluster_startMAX_GRAPHEME_SCANr-   r0   rc   r[   rJ   rI   )rQ   rh   	target_cpprev_cp
safe_startcpre   left_gcbrJ   rX   	right_gcbrU   r8   r8   r9   rk   <  s:    	rk   c                 C  s    |dkrdS t | t|t| S )a  
    Find the grapheme cluster boundary immediately before a position.

    :param unistr: The Unicode string to search.
    :param pos: Position in the string (0 < pos <= len(unistr)).
    :returns: Start index of the grapheme cluster containing the character at pos-1.

    Example::

        >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
        6
        >>> grapheme_boundary_before('a\r\nb', 3)
        1

    .. versionadded:: 0.3.6
    r   )rk   rb   ra   )r\   rh   r8   r8   r9   grapheme_boundary_beforep  s   rs   c                 c  s    | sdS t | }|du r|nt||}t|d}||ks!||kr#dS |}||krCt| |}||k r4dS | || V  |}||ks)dS dS )a  
    Iterate over grapheme clusters in reverse order (last to first).

    :param unistr: The Unicode string to segment.
    :param start: Starting index (default 0).
    :param end: Ending index (default len(unistr)).
    :yields: Grapheme cluster substrings in reverse order.

    Example::

        >>> list(iter_graphemes_reverse('cafe\u0301'))
        ['e\u0301', 'f', 'a', 'c']

    .. versionadded:: 0.3.6
    Nr   )ra   rb   maxrk   )r\   r]   r^   rd   rh   re   r8   r8   r9   iter_graphemes_reverse  s    

ru   )r;   r<   r=   r   )r;   r<   r=   rA   )rL   r   rM   r   r=   rN   )rL   r   rM   r   rQ   rR   rS   r<   rJ   r<   r=   rH   )r   N)r\   rR   r]   r<   r^   r_   r=   r`   )rQ   rR   rh   r<   r=   r<   )r\   rR   rh   r<   r=   r<   )+r)   
__future__r   enumr   	functoolsr   typingr   r   r   r>   table_graphemer	   r
   r   r   r   r   r   r   r   r   r   r   r   r   collections.abcr   rl   r   r@   rC   rE   rF   rG   rH   rP   r[   rg   rk   rs   ru   r8   r8   r8   r9   <module>   sB    	@"
0E
D
4