23 lines
638 B
Rust
23 lines
638 B
Rust
use html_escape::decode_html_entities;
|
|
use regex::Regex;
|
|
|
|
pub fn normalize_news_content(content: Option<String>) -> Option<String> {
|
|
content.as_ref()?;
|
|
let content = content.unwrap();
|
|
|
|
let re_tags = Regex::new("<[^>]+>").unwrap();
|
|
let re_spaces = Regex::new("[\\u00A0\\s]+").unwrap();
|
|
|
|
let content = content.replace('\n', " ");
|
|
let content = re_tags.replace_all(&content, "");
|
|
let content = re_spaces.replace_all(&content, " ");
|
|
let content = decode_html_entities(&content);
|
|
let content = content.trim();
|
|
|
|
if content.is_empty() {
|
|
None
|
|
} else {
|
|
Some(content.to_string())
|
|
}
|
|
}
|