From 35bf5998564eda288ca4ee86e0fa45b28c64d7ba Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Tue, 20 Sep 2022 07:51:46 +0200 Subject: [PATCH] utils/web: Add
to the list of block elements It should always be replaced with a space. --- src/utils/web.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils/web.py b/src/utils/web.py index bde24e1e1..4dfaf8bd9 100644 --- a/src/utils/web.py +++ b/src/utils/web.py @@ -226,6 +226,8 @@ def getEncoding(s): # From beautifulsoup (version 4.10.0, bs4/builder/__init__.py, line 391) _block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) +_block_elements.update({"br"}) + class HtmlToText(HTMLParser, object): """Taken from some eff-bot code on c.l.p.""" entitydefs = entitydefs.copy()