diff --git a/Cargo.lock b/Cargo.lock index 1bbb709..96d16ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 3 [[package]] name = "nanohtml2text" -version = "0.1.2" +version = "0.1.3" diff --git a/src/lib.rs b/src/lib.rs index 103a527..bc2db93 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,12 +121,26 @@ fn handle_tag(s: &str) -> (String, usize) { // messed up from one uppercase symbol becoming two lowercase // symbols or something like that let more = more.to_ascii_lowercase(); - let end = more - .find("').map(|i| end_tag + i + 1)) .unwrap_or_else(|| more.len()); - (href.unwrap_or_default(), tag.len() + 1 + end) + + let link = match (content, href) { + (Some(content_value), Some(href_value)) => { + let cleaned_content_value = html2text(content_value); + format!("{} ({})", cleaned_content_value, href_value) + } + (None, Some(href_value)) => href_value, + (Some(content_value), None) => content_value.to_string(), + (None, None) => "".to_string(), + }; + + (link, tag.len() + 1 + end) } "br" | "br/" | "li" | "/ol" | "/ul" => (String::from("\r\n"), tag.len() + 1), "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "/h1" | "/h2" | "/h3" | "/h4" | "/h5" @@ -225,25 +239,25 @@ mod tests { // links link: "click here" - to "click test", + to "click here (test)", links_ignore_attributes: "click here" - to "click test", + to "click here (test)", link_entities_in_url: "click here" - to "click ents/'x'", + to "click here (ents/'x')", link_javascript: "click here" - to "click ", + to "click here", link_ignore_content_tags: "click here or here" - to "click test", + to "click here or here (test)", link_absolute_url: "click news" - to "click http://bit.ly/2n4wXRs", + to "click news (http://bit.ly/2n4wXRs)", link_ignore_attributes_2: "yet, not yet" - to "/wiki/yet#English, /wiki/not_yet#English", + to "yet (/wiki/yet#English), not yet (/wiki/not_yet#English)", // inlines ignore_inline: "strong text"