diff options
Diffstat (limited to 'bitbake/lib/bs4/formatter.py')
-rw-r--r-- | bitbake/lib/bs4/formatter.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/bitbake/lib/bs4/formatter.py b/bitbake/lib/bs4/formatter.py new file mode 100644 index 0000000000..9fa1b57cb6 --- /dev/null +++ b/bitbake/lib/bs4/formatter.py @@ -0,0 +1,185 @@ +from bs4.dammit import EntitySubstitution + +class Formatter(EntitySubstitution): + """Describes a strategy to use when outputting a parse tree to a string. + + Some parts of this strategy come from the distinction between + HTML4, HTML5, and XML. Others are configurable by the user. + + Formatters are passed in as the `formatter` argument to methods + like `PageElement.encode`. Most people won't need to think about + formatters, and most people who need to think about them can pass + in one of these predefined strings as `formatter` rather than + making a new Formatter object: + + For HTML documents: + * 'html' - HTML entity substitution for generic HTML documents. (default) + * 'html5' - HTML entity substitution for HTML5 documents, as + well as some optimizations in the way tags are rendered. + * 'minimal' - Only make the substitutions necessary to guarantee + valid HTML. + * None - Do not perform any substitution. This will be faster + but may result in invalid markup. + + For XML documents: + * 'html' - Entity substitution for XHTML documents. + * 'minimal' - Only make the substitutions necessary to guarantee + valid XML. (default) + * None - Do not perform any substitution. This will be faster + but may result in invalid markup. + """ + # Registries of XML and HTML formatters. + XML_FORMATTERS = {} + HTML_FORMATTERS = {} + + HTML = 'html' + XML = 'xml' + + HTML_DEFAULTS = dict( + cdata_containing_tags=set(["script", "style"]), + ) + + def _default(self, language, value, kwarg): + if value is not None: + return value + if language == self.XML: + return set() + return self.HTML_DEFAULTS[kwarg] + + def __init__( + self, language=None, entity_substitution=None, + void_element_close_prefix='/', cdata_containing_tags=None, + empty_attributes_are_booleans=False, indent=1, + ): + r"""Constructor. + + :param language: This should be Formatter.XML if you are formatting + XML markup and Formatter.HTML if you are formatting HTML markup. + + :param entity_substitution: A function to call to replace special + characters with XML/HTML entities. For examples, see + bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. + :param void_element_close_prefix: By default, void elements + are represented as <tag/> (XML rules) rather than <tag> + (HTML rules). To get <tag>, pass in the empty string. + :param cdata_containing_tags: The list of tags that are defined + as containing CDATA in this dialect. For example, in HTML, + <script> and <style> tags are defined as containing CDATA, + and their contents should not be formatted. + :param blank_attributes_are_booleans: Render attributes whose value + is the empty string as HTML-style boolean attributes. + (Attributes whose value is None are always rendered this way.) + + :param indent: If indent is a non-negative integer or string, + then the contents of elements will be indented + appropriately when pretty-printing. An indent level of 0, + negative, or "" will only insert newlines. Using a + positive integer indent indents that many spaces per + level. If indent is a string (such as "\t"), that string + is used to indent each level. The default behavior is to + indent one space per level. + """ + self.language = language + self.entity_substitution = entity_substitution + self.void_element_close_prefix = void_element_close_prefix + self.cdata_containing_tags = self._default( + language, cdata_containing_tags, 'cdata_containing_tags' + ) + self.empty_attributes_are_booleans=empty_attributes_are_booleans + if indent is None: + indent = 0 + if isinstance(indent, int): + if indent < 0: + indent = 0 + indent = ' ' * indent + elif isinstance(indent, str): + indent = indent + else: + indent = ' ' + self.indent = indent + + def substitute(self, ns): + """Process a string that needs to undergo entity substitution. + This may be a string encountered in an attribute value or as + text. + + :param ns: A string. + :return: A string with certain characters replaced by named + or numeric entities. + """ + if not self.entity_substitution: + return ns + from .element import NavigableString + if (isinstance(ns, NavigableString) + and ns.parent is not None + and ns.parent.name in self.cdata_containing_tags): + # Do nothing. + return ns + # Substitute. + return self.entity_substitution(ns) + + def attribute_value(self, value): + """Process the value of an attribute. + + :param ns: A string. + :return: A string with certain characters replaced by named + or numeric entities. + """ + return self.substitute(value) + + def attributes(self, tag): + """Reorder a tag's attributes however you want. + + By default, attributes are sorted alphabetically. This makes + behavior consistent between Python 2 and Python 3, and preserves + backwards compatibility with older versions of Beautiful Soup. + + If `empty_boolean_attributes` is True, then attributes whose + values are set to the empty string will be treated as boolean + attributes. + """ + if tag.attrs is None: + return [] + return sorted( + (k, (None if self.empty_attributes_are_booleans and v == '' else v)) + for k, v in list(tag.attrs.items()) + ) + +class HTMLFormatter(Formatter): + """A generic Formatter for HTML.""" + REGISTRY = {} + def __init__(self, *args, **kwargs): + super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) + + +class XMLFormatter(Formatter): + """A generic Formatter for XML.""" + REGISTRY = {} + def __init__(self, *args, **kwargs): + super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) + + +# Set up aliases for the default formatters. +HTMLFormatter.REGISTRY['html'] = HTMLFormatter( + entity_substitution=EntitySubstitution.substitute_html +) +HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( + entity_substitution=EntitySubstitution.substitute_html, + void_element_close_prefix=None, + empty_attributes_are_booleans=True, +) +HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( + entity_substitution=EntitySubstitution.substitute_xml +) +HTMLFormatter.REGISTRY[None] = HTMLFormatter( + entity_substitution=None +) +XMLFormatter.REGISTRY["html"] = XMLFormatter( + entity_substitution=EntitySubstitution.substitute_html +) +XMLFormatter.REGISTRY["minimal"] = XMLFormatter( + entity_substitution=EntitySubstitution.substitute_xml +) +XMLFormatter.REGISTRY[None] = Formatter( + Formatter(Formatter.XML, entity_substitution=None) +) |