diff --git a/src/main.rs b/src/main.rs index f7895d8..437f7fe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,17 +3,24 @@ fn main() { println!("Hello, world!"); } -fn write_space(s: &mut String) {} +fn parse_html_entity(ent_name: &str) {} -fn html2text(input: &str) -> String { - let in_len = input.len(); +fn write_space(s: &mut String) { + let b = s.as_bytes(); + if b.len() > 0 && b[b.len() - 1] != b' ' { + s.push(' '); + } +} + +fn html2text(html: &str) -> String { + let in_len = html.len(); let mut tag_start = 0; let mut in_ent = false; let mut bad_tag_stack_depth = 0; let mut should_output = true; let mut can_print_new_line = false; let mut out_buf = String::new(); - for (i, r) in input.chars().enumerate() { + for (i, r) in html.chars().enumerate() { if in_len > 0 && i == in_len - 1 { can_print_new_line = false } @@ -27,8 +34,35 @@ fn html2text(input: &str) -> String { continue; } else if r == '&' && should_output { let mut ent_name = String::new(); - in_ent = false; - // parse the entity name, max 10 chars + let mut is_ent = false; + let mut chars = 10; + for er in html[i + 1..].chars() { + if er == ';' { + is_ent = true; + break; + } else { + ent_name.push(er); + } + chars += 1; + if chars == 10 { + break; + } + } + if is_ent { + // parseHTMLentity TODO + } + } else if r == '<' { + // start of tag + tag_start = i + 1; + should_output = false; + continue; + } else if r == '>' { // end of tag + // TODO + } + + if should_output && bad_tag_stack_depth == 0 && !in_ent { + can_print_new_line = true; + out_buf.push(r); } } out_buf