use html_escape::decode_html_entities; use regex::Regex; pub fn normalize_news_content(content: Option) -> Option { content.as_ref()?; let content = content.unwrap(); let re_tags = Regex::new("<[^>]+>").unwrap(); let re_spaces = Regex::new("[\\u00A0\\s]+").unwrap(); let content = content.replace('\n', " "); let content = re_tags.replace_all(&content, ""); let content = re_spaces.replace_all(&content, " "); let content = decode_html_entities(&content); let content = content.trim(); if content.is_empty() { None } else { Some(content.to_string()) } }