3
Ug=                 @   s  d dl mZmZmZ d dlmZ d dlZd dlmZm	Z	 ddl
mZmZmZ ddl
mZmZmZ ddlmZmZ d d	lmZ d
jed Zejde d Zejde d Zi ZeddkZxeej D ]p\Z Z!eree!dkse ree!dkrqe!dkree!dkrej"e!Z!ne#e!Z!e!eks4e j$ re ee!< qW dd Z%ede% dddZ&G dd de'Z(G dd de)Z*dS )    )absolute_importdivisionunicode_literals)	text_typeN)register_errorxmlcharrefreplace_errors   )voidElementsbooleanAttributesspaceCharacters)rcdataElementsentitiesxmlEntities)treewalkers_utils)escape z"'=<>`[]u_    	
 /`  ᠎᠏               　]u   􏿿   &c       
      C   s"  t | ttfrg }g }d}xt| j| j| j D ]n\}}|rFd}q4|| j }tj| j|t	| j|d g rtj
| j||d  }d}nt|}|j| q4W x^|D ]V}tj|}	|	r|jd |j|	 |	jds|jd q|jdt|dd    qW dj|| jfS t| S d S )NFr   Tr   ;z&#x%s;r   )
isinstanceUnicodeEncodeErrorUnicodeTranslateError	enumerateobjectstartendr   isSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr   )
excres
codepointsskipicindex	codepointcpe r3   C/tmp/pip-unpacked-wheel-0ht26j5g/pip/_vendor/html5lib/serializer.pyhtmlentityreplace_errors*   s0     
"




r5   htmlentityreplaceetreec             K   s$   t j|}tf |}|j|| |S )a  Serializes the input token stream using the specified treewalker

    :arg input: the token stream to serialize

    :arg tree: the treewalker to use

    :arg encoding: the encoding to use

    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created

    :returns: the tree serialized as a string

    Example:

    >>> from html5lib.html5parser import parse
    >>> from html5lib.serializer import serialize
    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
    >>> serialize(token_stream, omit_optional_tags=False)
    '<html><head></head><body><p>Hi!</p></body></html>'

    )r   getTreeWalkerHTMLSerializerrender)inputtreeencodingserializer_optswalkersr3   r3   r4   	serializeK   s    

rA   c               @   s~   e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZdZd!Zdd Zdd Zdd Zd"ddZd#ddZd$dd ZdS )%r9   legacy"TFquote_attr_values
quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrsescape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec          	   K   sz   t |t | j }t|dkr2tdtt| d|kr@d| _x(| jD ]}t| ||j|t	| | qHW g | _
d| _dS )aB
  Initialize HTMLSerializer

        :arg inject_meta_charset: Whether or not to inject the meta charset.

            Defaults to ``True``.

        :arg quote_attr_values: Whether to quote attribute values that don't
            require quoting per legacy browser behavior (``"legacy"``), when
            required by the standard (``"spec"``), or always (``"always"``).

            Defaults to ``"legacy"``.

        :arg quote_char: Use given quote character for attribute quoting.

            Defaults to ``"`` which will use double quotes unless attribute
            value contains a double quote, in which case single quotes are
            used.

        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
            values.

            Defaults to ``False``.

        :arg escape_rcdata: Whether to escape characters that need to be
            escaped within normal elements within rcdata elements such as
            style.

            Defaults to ``False``.

        :arg resolve_entities: Whether to resolve named character entities that
            appear in the source tree. The XML predefined entities &lt; &gt;
            &amp; &quot; &apos; are unaffected by this setting.

            Defaults to ``True``.

        :arg strip_whitespace: Whether to remove semantically meaningless
            whitespace. (This compresses all whitespace to a single space
            except within ``pre``.)

            Defaults to ``False``.

        :arg minimize_boolean_attributes: Shortens boolean attributes to give
            just the attribute value, for example::

              <input disabled="disabled">

            becomes::

              <input disabled>

            Defaults to ``True``.

        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
            start tag of void elements (empty elements whose end tag is
            forbidden). E.g. ``<hr/>``.

            Defaults to ``False``.

        :arg space_before_trailing_solidus: Places a space immediately before
            the closing slash in a tag using a trailing solidus. E.g.
            ``<hr />``. Requires ``use_trailing_solidus=True``.

            Defaults to ``True``.

        :arg sanitize: Strip all unsafe or unknown constructs from output.
            See :py:class:`html5lib.filters.sanitizer.Filter`.

            Defaults to ``False``.

        :arg omit_optional_tags: Omit start/end tags that are optional.

            Defaults to ``True``.

        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.

            Defaults to ``False``.

        r   z2__init__() got an unexpected keyword argument '%s'rE   FN)	frozensetoptionslen	TypeErrornextiterrF   setattrr%   getattrerrorsstrict)selfkwargsunexpected_argsattrr3   r3   r4   __init__   s    OzHTMLSerializer.__init__c             C   s*   t |tst| jr"|j| jdS |S d S )Nr6   )r   r   AssertionErrorr=   encode)r\   stringr3   r3   r4   rb      s    zHTMLSerializer.encodec             C   s*   t |tst| jr"|j| jdS |S d S )Nr[   )r   r   ra   r=   rb   )r\   rc   r3   r3   r4   encodeStrict   s    zHTMLSerializer.encodeStrictNc             c   s  || _ d}g | _|r0| jr0ddlm} |||}| jrJddlm} ||}| jrdddlm} ||}| j	r~ddl
m} ||}| jrddlm} ||}xR|D ]H}|d }|dkr`d|d  }|d r|d	|d  7 }n|d
 r|d7 }|d
 rJ|d
 jddkr0|d
 jddkr*| jd d}nd}|d||d
 |f 7 }|d7 }| j|V  q|d5kr|dksz|r|r|d jddkr| jd | j|d V  n| jt|d V  q|d6kr|d }	| jd|	 V  |	tkr| j rd}n|r| jd x|d j D ]\\}
}}|}|}| jdV  | j|V  | j s|tj|	t kr"|tjdt kr"| jdV  | jdkst|dkrd}n@| jd krtj|d k	}n$| jd!krtj|d k	}ntd"|jd#d$}| j r|jd%d&}|r| j!}| j"rTd|kr<d|kr<d}nd|krTd|krTd}|dkrl|jdd'}n|jdd(}| j|V  | j|V  | j|V  n| j|V  q"W |	t#kr| j$r| j%r| jd)V  n| jd*V  | jdV  q|d+kr6|d }	|	tkrd}n|r$| jd | jd,|	 V  q|d-krx|d }|jd.dkrb| jd/ | jd0|d  V  q|d1kr|d }	|	d2 }|t&kr| jd3|	  | j'r|t(krt&| }nd4|	 }| j|V  q| j|d  qW d S )7NFr   )FiltertypeDoctypez<!DOCTYPE %snamepublicIdz PUBLIC "%s"systemIdz SYSTEMrC   r   'zBSystem identifier contains both single and double quote charactersz %s%s%s>
CharactersSpaceCharactersdataz</zUnexpected </ in CDATAStartTagEmptyTagz<%sTz+Unexpected child element of a CDATA element r   =alwaysspecrB   z?quote_attr_values must be one of: 'always', 'spec', or 'legacy'r   z&amp;<z&lt;z&#39;z&quot;z //EndTagz</%s>Commentz--zComment contains --z	<!--%s-->Entityr   zEntity %s not recognizedz&%s;)rm   rn   )rp   rq   ))r=   rZ   rO   filters.inject_meta_charsetre   rN   filters.alphabeticalattributesrP   filters.whitespacerQ   filters.sanitizerrG   filters.optionaltagsfindserializeErrorrd   rb   r   r   rL   itemsrH   r
   r%   tuplerD   rT   _quoteAttributeSpecsearch_quoteAttributeLegacy
ValueErrorreplacerK   rE   rF   r	   rI   rJ   r   rM   r   )r\   
treewalkerr=   in_cdatare   tokenrf   doctyperE   rh   _	attr_name
attr_valuekv
quote_attrro   keyr3   r3   r4   rA      s    


















zHTMLSerializer.serializec             C   s2   |rdj t| j||S dj t| j|S dS )an  Serializes the stream from the treewalker into a string

        :arg treewalker: the treewalker to serialize

        :arg encoding: the string encoding to use

        :returns: the serialized tree

        Example:

        >>> from html5lib import parse, getTreeWalker
        >>> from html5lib.serializer import HTMLSerializer
        >>> token_stream = parse('<html><body>Hi!</body></html>')
        >>> walker = getTreeWalker('etree')
        >>> serializer = HTMLSerializer(omit_optional_tags=False)
        >>> serializer.render(walker(token_stream))
        '<html><head></head><body>Hi!</body></html>'

            r   N)r(   listrA   )r\   r   r=   r3   r3   r4   r:   w  s    zHTMLSerializer.renderXXX ERROR MESSAGE NEEDEDc             C   s   | j j| | jrtd S )N)rZ   r#   r[   SerializeError)r\   ro   r3   r3   r4   r     s    zHTMLSerializer.serializeError)rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   )N)N)r   )__name__
__module____qualname__rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rS   r`   rb   rd   rA   r:   r   r3   r3   r3   r4   r9   h   s6        Y
 

r9   c               @   s   e Zd ZdZdS )r   zError in serialized treeN)r   r   r   __doc__r3   r3   r3   r4   r     s   r   )r7   N)+
__future__r   r   r   pip._vendor.sixr   recodecsr   r   	constantsr	   r
   r   r   r   r   r   r   r   xml.sax.saxutilsr   r(   _quoteAttributeSpecCharscompiler   r   r$   rT   _is_ucs4r   r   r   r   r!   r"   islowerr5   rA   r   r9   	Exceptionr   r3   r3   r3   r4   <module>   s<   
	

  1