Add news data support
- Refactor everything in the process, oops Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
374
src/threads/data/backfill.rs
Normal file
374
src/threads/data/backfill.rs
Normal file
@@ -0,0 +1,374 @@
|
||||
use super::{Guard, ThreadType};
|
||||
use crate::{
|
||||
config::{Config, ALPACA_CRYPTO_DATA_URL, ALPACA_NEWS_DATA_URL, ALPACA_STOCK_DATA_URL},
|
||||
database,
|
||||
types::{
|
||||
alpaca::{api, Source},
|
||||
Asset, Bar, Class, News, Subset,
|
||||
},
|
||||
utils::{duration_until, last_minute, FIFTEEN_MINUTES, ONE_MINUTE},
|
||||
};
|
||||
use backoff::{future::retry, ExponentialBackoff};
|
||||
use log::{error, info};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::{
|
||||
join, spawn,
|
||||
sync::{mpsc, oneshot, Mutex, RwLock},
|
||||
task::JoinHandle,
|
||||
time::sleep,
|
||||
};
|
||||
|
||||
pub enum Action {
|
||||
Backfill,
|
||||
Purge,
|
||||
}
|
||||
|
||||
pub struct Message {
|
||||
pub action: Action,
|
||||
pub assets: Subset<Asset>,
|
||||
pub response: oneshot::Sender<()>,
|
||||
}
|
||||
|
||||
impl Message {
|
||||
pub fn new(action: Action, assets: Subset<Asset>) -> (Self, oneshot::Receiver<()>) {
|
||||
let (sender, receiver) = oneshot::channel::<()>();
|
||||
(
|
||||
Self {
|
||||
action,
|
||||
assets,
|
||||
response: sender,
|
||||
},
|
||||
receiver,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(
|
||||
app_config: Arc<Config>,
|
||||
thread_type: ThreadType,
|
||||
guard: Arc<RwLock<Guard>>,
|
||||
mut backfill_receiver: mpsc::Receiver<Message>,
|
||||
) {
|
||||
let backfill_jobs = Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
let data_url = match thread_type {
|
||||
ThreadType::Bars(Class::UsEquity) => ALPACA_STOCK_DATA_URL.to_string(),
|
||||
ThreadType::Bars(Class::Crypto) => ALPACA_CRYPTO_DATA_URL.to_string(),
|
||||
ThreadType::News => ALPACA_NEWS_DATA_URL.to_string(),
|
||||
};
|
||||
|
||||
loop {
|
||||
let app_config = app_config.clone();
|
||||
let guard = guard.clone();
|
||||
let backfill_jobs = backfill_jobs.clone();
|
||||
let data_url = data_url.clone();
|
||||
|
||||
let message = backfill_receiver.recv().await.unwrap();
|
||||
|
||||
spawn(handle_backfill_message(
|
||||
app_config,
|
||||
thread_type,
|
||||
guard,
|
||||
data_url,
|
||||
backfill_jobs,
|
||||
message,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::significant_drop_tightening)]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
async fn handle_backfill_message(
|
||||
app_config: Arc<Config>,
|
||||
thread_type: ThreadType,
|
||||
guard: Arc<RwLock<Guard>>,
|
||||
data_url: String,
|
||||
backfill_jobs: Arc<Mutex<HashMap<String, JoinHandle<()>>>>,
|
||||
message: Message,
|
||||
) {
|
||||
let guard = guard.read().await;
|
||||
let mut backfill_jobs = backfill_jobs.lock().await;
|
||||
|
||||
let symbols = match message.assets {
|
||||
Subset::All => guard.symbols.clone().into_iter().collect::<Vec<_>>(),
|
||||
Subset::Some(assets) => assets
|
||||
.into_iter()
|
||||
.map(|asset| match thread_type {
|
||||
ThreadType::Bars(_) => asset.symbol,
|
||||
ThreadType::News => asset.abbreviation,
|
||||
})
|
||||
.filter(|symbol| match message.action {
|
||||
Action::Backfill => guard.symbols.contains(symbol),
|
||||
Action::Purge => !guard.symbols.contains(symbol),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
};
|
||||
|
||||
match message.action {
|
||||
Action::Backfill => {
|
||||
for symbol in symbols {
|
||||
if let Some(job) = backfill_jobs.remove(&symbol) {
|
||||
if !job.is_finished() {
|
||||
job.abort();
|
||||
}
|
||||
job.await.unwrap_err();
|
||||
}
|
||||
|
||||
let app_config = app_config.clone();
|
||||
let data_url = data_url.clone();
|
||||
|
||||
backfill_jobs.insert(
|
||||
symbol.clone(),
|
||||
spawn(async move {
|
||||
let (fetch_from, fetch_to) =
|
||||
queue_backfill(&app_config, thread_type, &symbol).await;
|
||||
|
||||
match thread_type {
|
||||
ThreadType::Bars(_) => {
|
||||
execute_backfill_bars(
|
||||
app_config,
|
||||
thread_type,
|
||||
data_url,
|
||||
symbol,
|
||||
fetch_from,
|
||||
fetch_to,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ThreadType::News => {
|
||||
execute_backfill_news(
|
||||
app_config,
|
||||
thread_type,
|
||||
data_url,
|
||||
symbol,
|
||||
fetch_from,
|
||||
fetch_to,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
Action::Purge => {
|
||||
for symbol in &symbols {
|
||||
if let Some(job) = backfill_jobs.remove(symbol) {
|
||||
if !job.is_finished() {
|
||||
job.abort();
|
||||
}
|
||||
job.await.unwrap_err();
|
||||
}
|
||||
}
|
||||
|
||||
let backfills_future = database::backfills::delete_where_symbols(
|
||||
&app_config.clickhouse_client,
|
||||
&thread_type,
|
||||
&symbols,
|
||||
);
|
||||
|
||||
let data_future = async {
|
||||
match thread_type {
|
||||
ThreadType::Bars(_) => {
|
||||
database::bars::delete_where_symbols(
|
||||
&app_config.clickhouse_client,
|
||||
&symbols,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ThreadType::News => {
|
||||
database::news::delete_where_symbols(
|
||||
&app_config.clickhouse_client,
|
||||
&symbols,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
join!(backfills_future, data_future);
|
||||
}
|
||||
}
|
||||
|
||||
message.response.send(()).unwrap();
|
||||
}
|
||||
|
||||
async fn queue_backfill(
|
||||
app_config: &Arc<Config>,
|
||||
thread_type: ThreadType,
|
||||
symbol: &String,
|
||||
) -> (OffsetDateTime, OffsetDateTime) {
|
||||
let latest_backfill = database::backfills::select_latest_where_symbol(
|
||||
&app_config.clickhouse_client,
|
||||
&thread_type,
|
||||
&symbol,
|
||||
)
|
||||
.await;
|
||||
|
||||
let fetch_from = latest_backfill
|
||||
.as_ref()
|
||||
.map_or(OffsetDateTime::UNIX_EPOCH, |backfill| {
|
||||
backfill.time + ONE_MINUTE
|
||||
});
|
||||
|
||||
let fetch_to = last_minute();
|
||||
|
||||
if app_config.alpaca_source == Source::Iex {
|
||||
let run_delay = duration_until(fetch_to + FIFTEEN_MINUTES + ONE_MINUTE);
|
||||
info!(
|
||||
"{:?} - Queing backfill for {} in {:?}.",
|
||||
thread_type, symbol, run_delay
|
||||
);
|
||||
sleep(run_delay).await;
|
||||
}
|
||||
|
||||
(fetch_from, fetch_to)
|
||||
}
|
||||
|
||||
async fn execute_backfill_bars(
|
||||
app_config: Arc<Config>,
|
||||
thread_type: ThreadType,
|
||||
data_url: String,
|
||||
symbol: String,
|
||||
fetch_from: OffsetDateTime,
|
||||
fetch_to: OffsetDateTime,
|
||||
) {
|
||||
if fetch_from > fetch_to {
|
||||
return;
|
||||
}
|
||||
|
||||
info!("{:?} - Backfilling data for {}.", thread_type, symbol);
|
||||
|
||||
let mut bars = Vec::new();
|
||||
let mut next_page_token = None;
|
||||
|
||||
loop {
|
||||
let message = retry(ExponentialBackoff::default(), || async {
|
||||
app_config.alpaca_rate_limit.until_ready().await;
|
||||
app_config
|
||||
.alpaca_client
|
||||
.get(&data_url)
|
||||
.query(&api::outgoing::bar::Bar::new(
|
||||
vec![symbol.clone()],
|
||||
ONE_MINUTE,
|
||||
fetch_from,
|
||||
fetch_to,
|
||||
10000,
|
||||
next_page_token.clone(),
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?
|
||||
.json::<api::incoming::bar::Message>()
|
||||
.await
|
||||
.map_err(backoff::Error::Permanent)
|
||||
})
|
||||
.await;
|
||||
|
||||
let message = match message {
|
||||
Ok(message) => message,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"{:?} - Failed to backfill data for {}: {}.",
|
||||
thread_type, symbol, e
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
message.bars.into_iter().for_each(|(symbol, bar_vec)| {
|
||||
for bar in bar_vec {
|
||||
bars.push(Bar::from((bar, symbol.clone())));
|
||||
}
|
||||
});
|
||||
|
||||
if message.next_page_token.is_none() {
|
||||
break;
|
||||
}
|
||||
next_page_token = message.next_page_token;
|
||||
}
|
||||
|
||||
if bars.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let backfill = bars.last().unwrap().clone().into();
|
||||
database::bars::upsert_batch(&app_config.clickhouse_client, bars).await;
|
||||
database::backfills::upsert(&app_config.clickhouse_client, &thread_type, &backfill).await;
|
||||
|
||||
info!("{:?} - Backfilled data for {}.", thread_type, symbol);
|
||||
}
|
||||
|
||||
async fn execute_backfill_news(
|
||||
app_config: Arc<Config>,
|
||||
thread_type: ThreadType,
|
||||
data_url: String,
|
||||
symbol: String,
|
||||
fetch_from: OffsetDateTime,
|
||||
fetch_to: OffsetDateTime,
|
||||
) {
|
||||
if fetch_from > fetch_to {
|
||||
return;
|
||||
}
|
||||
|
||||
info!("{:?} - Backfilling data for {}.", thread_type, symbol);
|
||||
|
||||
let mut news = Vec::new();
|
||||
let mut next_page_token = None;
|
||||
|
||||
loop {
|
||||
let message = retry(ExponentialBackoff::default(), || async {
|
||||
app_config.alpaca_rate_limit.until_ready().await;
|
||||
app_config
|
||||
.alpaca_client
|
||||
.get(&data_url)
|
||||
.query(&api::outgoing::news::News::new(
|
||||
vec![symbol.clone()],
|
||||
fetch_from,
|
||||
fetch_to,
|
||||
50,
|
||||
true,
|
||||
false,
|
||||
next_page_token.clone(),
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?
|
||||
.json::<api::incoming::news::Message>()
|
||||
.await
|
||||
.map_err(backoff::Error::Permanent)
|
||||
})
|
||||
.await;
|
||||
|
||||
let message = match message {
|
||||
Ok(message) => message,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"{:?} - Failed to backfill data for {}: {}.",
|
||||
thread_type, symbol, e
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
message.news.into_iter().for_each(|news_item| {
|
||||
news.push(News::from(news_item));
|
||||
});
|
||||
|
||||
if message.next_page_token.is_none() {
|
||||
break;
|
||||
}
|
||||
next_page_token = message.next_page_token;
|
||||
}
|
||||
|
||||
if news.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let backfill = (news.last().unwrap().clone(), symbol.clone()).into();
|
||||
database::news::upsert_batch(&app_config.clickhouse_client, news).await;
|
||||
database::backfills::upsert(&app_config.clickhouse_client, &thread_type, &backfill).await;
|
||||
|
||||
info!("{:?} - Backfilled data for {}.", thread_type, symbol);
|
||||
}
|
Reference in New Issue
Block a user