diff --git a/Cargo.lock b/Cargo.lock index d4adc35..bfe05b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,7 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "nanohtml2text" -version = "0.2.0" +version = "0.2.1" diff --git a/src/lib.rs b/src/lib.rs index f44222c..f4490d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,9 +48,9 @@ fn html_entities_to_text(s: &str) -> String { if let Some(entity) = parse_html_entity(&part[..end]) { out.push(entity); - // Advance past the entity and any following semicolon or whitespace - let next_char_len = part[end..].chars().next().map_or(0, |c| c.len_utf8()); - let remaining = &part[end + next_char_len..]; + // Advance past the entity and any following semicolon + let skip = if part[end..].starts_with(';') { 1 } else { 0 }; + let remaining = &part[end + skip..]; out.push_str(remaining); } else { @@ -276,6 +276,7 @@ mod tests { ignore_unknown_tag_whitespace: "hello" to "hello", ignore_unknown_tag_attributes: "hello" to "hello", invalid_html_entity_without_semicolon: "&hellip" to "…", + entity_whitespace_preserved: "& test" to "& test", } }