Fix whitespace bug

This commit is contained in:
alex wennerberg
2026-02-14 13:29:52 -08:00
parent 8d57ec3524
commit 97a1194cb1
2 changed files with 6 additions and 5 deletions

4
Cargo.lock generated
View File

@@ -1,7 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "nanohtml2text"
version = "0.2.0"
version = "0.2.1"

View File

@@ -48,9 +48,9 @@ fn html_entities_to_text(s: &str) -> String {
if let Some(entity) = parse_html_entity(&part[..end]) {
out.push(entity);
// Advance past the entity and any following semicolon or whitespace
let next_char_len = part[end..].chars().next().map_or(0, |c| c.len_utf8());
let remaining = &part[end + next_char_len..];
// Advance past the entity and any following semicolon
let skip = if part[end..].starts_with(';') { 1 } else { 0 };
let remaining = &part[end + skip..];
out.push_str(remaining);
} else {
@@ -276,6 +276,7 @@ mod tests {
ignore_unknown_tag_whitespace: "<aa >hello</aa>" to "hello",
ignore_unknown_tag_attributes: "<aa x=\"1\">hello</aa>" to "hello",
invalid_html_entity_without_semicolon: "&hellip" to "",
entity_whitespace_preserved: "&amp test" to "& test",
}
}