HTML Tag Stripper
Remove HTML tags and extract plain text content from HTML markup.
57 characters Β· 5 lines
Frequently Asked Questions
Code Implementation
import re
import html
def strip_html_tags(text: str, decode_entities: bool = True) -> str:
"""Remove all HTML tags and optionally decode HTML entities."""
# Remove HTML tags
clean = re.sub(r"<[^>]+>", "", text)
# Decode HTML entities
if decode_entities:
clean = html.unescape(clean)
return clean.strip()
# Example usage
html_text = "<p>Hello, <b>World</b>! & welcome to <Python>.</p>"
print(strip_html_tags(html_text))
# Hello, World! & welcome to <Python>.
print(strip_html_tags(html_text, decode_entities=False))
# Hello, World! & welcome to <Python>.
Comments & Feedback
Comments are powered by Giscus. Sign in with GitHub to leave a comment.