Add news data support

- Refactor everything in the process, oops

Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
2024-01-25 10:46:42 +00:00
parent 178a062c25
commit 002f70e299
53 changed files with 1683 additions and 677 deletions

View File

@@ -0,0 +1,374 @@
use super::{Guard, ThreadType};
use crate::{
config::{Config, ALPACA_CRYPTO_DATA_URL, ALPACA_NEWS_DATA_URL, ALPACA_STOCK_DATA_URL},
database,
types::{
alpaca::{api, Source},
Asset, Bar, Class, News, Subset,
},
utils::{duration_until, last_minute, FIFTEEN_MINUTES, ONE_MINUTE},
};
use backoff::{future::retry, ExponentialBackoff};
use log::{error, info};
use std::{collections::HashMap, sync::Arc};
use time::OffsetDateTime;
use tokio::{
join, spawn,
sync::{mpsc, oneshot, Mutex, RwLock},
task::JoinHandle,
time::sleep,
};
pub enum Action {
Backfill,
Purge,
}
pub struct Message {
pub action: Action,
pub assets: Subset<Asset>,
pub response: oneshot::Sender<()>,
}
impl Message {
pub fn new(action: Action, assets: Subset<Asset>) -> (Self, oneshot::Receiver<()>) {
let (sender, receiver) = oneshot::channel::<()>();
(
Self {
action,
assets,
response: sender,
},
receiver,
)
}
}
pub async fn run(
app_config: Arc<Config>,
thread_type: ThreadType,
guard: Arc<RwLock<Guard>>,
mut backfill_receiver: mpsc::Receiver<Message>,
) {
let backfill_jobs = Arc::new(Mutex::new(HashMap::new()));
let data_url = match thread_type {
ThreadType::Bars(Class::UsEquity) => ALPACA_STOCK_DATA_URL.to_string(),
ThreadType::Bars(Class::Crypto) => ALPACA_CRYPTO_DATA_URL.to_string(),
ThreadType::News => ALPACA_NEWS_DATA_URL.to_string(),
};
loop {
let app_config = app_config.clone();
let guard = guard.clone();
let backfill_jobs = backfill_jobs.clone();
let data_url = data_url.clone();
let message = backfill_receiver.recv().await.unwrap();
spawn(handle_backfill_message(
app_config,
thread_type,
guard,
data_url,
backfill_jobs,
message,
));
}
}
#[allow(clippy::significant_drop_tightening)]
#[allow(clippy::too_many_lines)]
async fn handle_backfill_message(
app_config: Arc<Config>,
thread_type: ThreadType,
guard: Arc<RwLock<Guard>>,
data_url: String,
backfill_jobs: Arc<Mutex<HashMap<String, JoinHandle<()>>>>,
message: Message,
) {
let guard = guard.read().await;
let mut backfill_jobs = backfill_jobs.lock().await;
let symbols = match message.assets {
Subset::All => guard.symbols.clone().into_iter().collect::<Vec<_>>(),
Subset::Some(assets) => assets
.into_iter()
.map(|asset| match thread_type {
ThreadType::Bars(_) => asset.symbol,
ThreadType::News => asset.abbreviation,
})
.filter(|symbol| match message.action {
Action::Backfill => guard.symbols.contains(symbol),
Action::Purge => !guard.symbols.contains(symbol),
})
.collect::<Vec<_>>(),
};
match message.action {
Action::Backfill => {
for symbol in symbols {
if let Some(job) = backfill_jobs.remove(&symbol) {
if !job.is_finished() {
job.abort();
}
job.await.unwrap_err();
}
let app_config = app_config.clone();
let data_url = data_url.clone();
backfill_jobs.insert(
symbol.clone(),
spawn(async move {
let (fetch_from, fetch_to) =
queue_backfill(&app_config, thread_type, &symbol).await;
match thread_type {
ThreadType::Bars(_) => {
execute_backfill_bars(
app_config,
thread_type,
data_url,
symbol,
fetch_from,
fetch_to,
)
.await;
}
ThreadType::News => {
execute_backfill_news(
app_config,
thread_type,
data_url,
symbol,
fetch_from,
fetch_to,
)
.await;
}
}
}),
);
}
}
Action::Purge => {
for symbol in &symbols {
if let Some(job) = backfill_jobs.remove(symbol) {
if !job.is_finished() {
job.abort();
}
job.await.unwrap_err();
}
}
let backfills_future = database::backfills::delete_where_symbols(
&app_config.clickhouse_client,
&thread_type,
&symbols,
);
let data_future = async {
match thread_type {
ThreadType::Bars(_) => {
database::bars::delete_where_symbols(
&app_config.clickhouse_client,
&symbols,
)
.await;
}
ThreadType::News => {
database::news::delete_where_symbols(
&app_config.clickhouse_client,
&symbols,
)
.await;
}
}
};
join!(backfills_future, data_future);
}
}
message.response.send(()).unwrap();
}
async fn queue_backfill(
app_config: &Arc<Config>,
thread_type: ThreadType,
symbol: &String,
) -> (OffsetDateTime, OffsetDateTime) {
let latest_backfill = database::backfills::select_latest_where_symbol(
&app_config.clickhouse_client,
&thread_type,
&symbol,
)
.await;
let fetch_from = latest_backfill
.as_ref()
.map_or(OffsetDateTime::UNIX_EPOCH, |backfill| {
backfill.time + ONE_MINUTE
});
let fetch_to = last_minute();
if app_config.alpaca_source == Source::Iex {
let run_delay = duration_until(fetch_to + FIFTEEN_MINUTES + ONE_MINUTE);
info!(
"{:?} - Queing backfill for {} in {:?}.",
thread_type, symbol, run_delay
);
sleep(run_delay).await;
}
(fetch_from, fetch_to)
}
async fn execute_backfill_bars(
app_config: Arc<Config>,
thread_type: ThreadType,
data_url: String,
symbol: String,
fetch_from: OffsetDateTime,
fetch_to: OffsetDateTime,
) {
if fetch_from > fetch_to {
return;
}
info!("{:?} - Backfilling data for {}.", thread_type, symbol);
let mut bars = Vec::new();
let mut next_page_token = None;
loop {
let message = retry(ExponentialBackoff::default(), || async {
app_config.alpaca_rate_limit.until_ready().await;
app_config
.alpaca_client
.get(&data_url)
.query(&api::outgoing::bar::Bar::new(
vec![symbol.clone()],
ONE_MINUTE,
fetch_from,
fetch_to,
10000,
next_page_token.clone(),
))
.send()
.await?
.error_for_status()?
.json::<api::incoming::bar::Message>()
.await
.map_err(backoff::Error::Permanent)
})
.await;
let message = match message {
Ok(message) => message,
Err(e) => {
error!(
"{:?} - Failed to backfill data for {}: {}.",
thread_type, symbol, e
);
return;
}
};
message.bars.into_iter().for_each(|(symbol, bar_vec)| {
for bar in bar_vec {
bars.push(Bar::from((bar, symbol.clone())));
}
});
if message.next_page_token.is_none() {
break;
}
next_page_token = message.next_page_token;
}
if bars.is_empty() {
return;
}
let backfill = bars.last().unwrap().clone().into();
database::bars::upsert_batch(&app_config.clickhouse_client, bars).await;
database::backfills::upsert(&app_config.clickhouse_client, &thread_type, &backfill).await;
info!("{:?} - Backfilled data for {}.", thread_type, symbol);
}
async fn execute_backfill_news(
app_config: Arc<Config>,
thread_type: ThreadType,
data_url: String,
symbol: String,
fetch_from: OffsetDateTime,
fetch_to: OffsetDateTime,
) {
if fetch_from > fetch_to {
return;
}
info!("{:?} - Backfilling data for {}.", thread_type, symbol);
let mut news = Vec::new();
let mut next_page_token = None;
loop {
let message = retry(ExponentialBackoff::default(), || async {
app_config.alpaca_rate_limit.until_ready().await;
app_config
.alpaca_client
.get(&data_url)
.query(&api::outgoing::news::News::new(
vec![symbol.clone()],
fetch_from,
fetch_to,
50,
true,
false,
next_page_token.clone(),
))
.send()
.await?
.error_for_status()?
.json::<api::incoming::news::Message>()
.await
.map_err(backoff::Error::Permanent)
})
.await;
let message = match message {
Ok(message) => message,
Err(e) => {
error!(
"{:?} - Failed to backfill data for {}: {}.",
thread_type, symbol, e
);
return;
}
};
message.news.into_iter().for_each(|news_item| {
news.push(News::from(news_item));
});
if message.next_page_token.is_none() {
break;
}
next_page_token = message.next_page_token;
}
if news.is_empty() {
return;
}
let backfill = (news.last().unwrap().clone(), symbol.clone()).into();
database::news::upsert_batch(&app_config.clickhouse_client, news).await;
database::backfills::upsert(&app_config.clickhouse_client, &thread_type, &backfill).await;
info!("{:?} - Backfilled data for {}.", thread_type, symbol);
}