Add news content normalization and storing
Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
22
src/utils/news.rs
Normal file
22
src/utils/news.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
use html_escape::decode_html_entities;
|
||||
use regex::Regex;
|
||||
|
||||
pub fn normalize_news_content(content: Option<String>) -> Option<String> {
|
||||
content.as_ref()?;
|
||||
let content = content.unwrap();
|
||||
|
||||
let re_tags = Regex::new("<[^>]+>").unwrap();
|
||||
let re_spaces = Regex::new("[\\u00A0\\s]+").unwrap();
|
||||
|
||||
let content = content.replace('\n', " ");
|
||||
let content = re_tags.replace_all(&content, "");
|
||||
let content = re_spaces.replace_all(&content, " ");
|
||||
let content = decode_html_entities(&content);
|
||||
let content = content.trim();
|
||||
|
||||
if content.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(content.to_string())
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user