πŸ› οΈToolsShed

HTML Tag Stripper

Remove HTML tags and extract plain text content from HTML markup.

57 characters Β· 5 lines

Frequently Asked Questions

Code Implementation

import re
import html

def strip_html_tags(text: str, decode_entities: bool = True) -> str:
    """Remove all HTML tags and optionally decode HTML entities."""
    # Remove HTML tags
    clean = re.sub(r"<[^>]+>", "", text)
    # Decode HTML entities
    if decode_entities:
        clean = html.unescape(clean)
    return clean.strip()

# Example usage
html_text = "<p>Hello, <b>World</b>! &amp; welcome to &lt;Python&gt;.</p>"
print(strip_html_tags(html_text))
# Hello, World! & welcome to <Python>.

print(strip_html_tags(html_text, decode_entities=False))
# Hello, World! &amp; welcome to &lt;Python&gt;.

Comments & Feedback

Comments are powered by Giscus. Sign in with GitHub to leave a comment.