Improve asset backfilling

Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
2023-09-09 20:35:15 +03:00
parent c57eb7567c
commit 8a88d58192
9 changed files with 129 additions and 302 deletions

View File

@@ -1,22 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO bars_filled (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)\n ON CONFLICT (timestamp, asset_symbol) DO NOTHING",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Timestamptz",
"Text",
"Float8",
"Float8",
"Float8",
"Float8",
"Float8",
"Int8",
"Float8"
]
},
"nullable": []
},
"hash": "08656bb2e5424ab67014cc40c6cdcbf852e3e30364c3ea23269f29a331f807be"
}

View File

@@ -1,78 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO bars_filled (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)\n ON CONFLICT (timestamp, asset_symbol) DO UPDATE SET open = $3, high = $4, low = $5, close = $6, volume = $7, num_trades = $8, volume_weighted = $9\n RETURNING timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "timestamp",
"type_info": "Timestamptz"
},
{
"ordinal": 1,
"name": "asset_symbol",
"type_info": "Text"
},
{
"ordinal": 2,
"name": "open",
"type_info": "Float8"
},
{
"ordinal": 3,
"name": "high",
"type_info": "Float8"
},
{
"ordinal": 4,
"name": "low",
"type_info": "Float8"
},
{
"ordinal": 5,
"name": "close",
"type_info": "Float8"
},
{
"ordinal": 6,
"name": "volume",
"type_info": "Float8"
},
{
"ordinal": 7,
"name": "num_trades",
"type_info": "Int8"
},
{
"ordinal": 8,
"name": "volume_weighted",
"type_info": "Float8"
}
],
"parameters": {
"Left": [
"Timestamptz",
"Text",
"Float8",
"Float8",
"Float8",
"Float8",
"Float8",
"Int8",
"Float8"
]
},
"nullable": [
false,
false,
false,
false,
false,
false,
false,
false,
false
]
},
"hash": "26f7ea563429e395d270cfae4993059aa8ddab93e52c95761de2de022af91ad8"
}

View File

@@ -1,6 +1,6 @@
{ {
"db_name": "PostgreSQL", "db_name": "PostgreSQL",
"query": "SELECT * FROM bars_filled WHERE timestamp < $1 AND asset_symbol = $2 ORDER BY timestamp DESC LIMIT 1", "query": "SELECT * FROM bars WHERE asset_symbol = $1 AND timestamp < $2 AND open IS NOT NULL AND high IS NOT NULL AND low IS NOT NULL AND close IS NOT NULL ORDER BY timestamp DESC LIMIT 1",
"describe": { "describe": {
"columns": [ "columns": [
{ {
@@ -51,21 +51,21 @@
], ],
"parameters": { "parameters": {
"Left": [ "Left": [
"Timestamptz", "Text",
"Text" "Timestamptz"
] ]
}, },
"nullable": [ "nullable": [
false, false,
false, false,
false, true,
false, true,
false, true,
false, true,
false, false,
false, false,
false false
] ]
}, },
"hash": "615dcbdc8f624ee990566b21f61b495fb273194a8278435cf4ed84028a55dbc0" "hash": "6d9509cd482fbc022bfd157af8e59a1a32f0fbd8802cfec980e05706fb697b58"
} }

View File

@@ -109,8 +109,7 @@ pub async fn backfill(
let bars = bars.into_values().collect::<Vec<Bar>>(); let bars = bars.into_values().collect::<Vec<Bar>>();
let transaction = app_config.postgres_pool.begin().await.unwrap(); let transaction = app_config.postgres_pool.begin().await.unwrap();
database::bars::upsert_batch(&app_config.postgres_pool, &bars).await; database::bars::upsert_batch(&app_config.postgres_pool, &bars, true).await;
database::bars_filled::upsert_batch(&app_config.postgres_pool, &bars).await;
database::assets::update_timestamp_last_where_symbol( database::assets::update_timestamp_last_where_symbol(
&app_config.postgres_pool, &app_config.postgres_pool,
&asset.symbol, &asset.symbol,
@@ -137,7 +136,7 @@ async fn backfill_recent_nulls(
from, from,
) )
.await; .await;
database::bars_filled::upsert_batch(&app_config.postgres_pool, &bars).await; database::bars::upsert_batch(&app_config.postgres_pool, &bars, true).await;
database::assets::update_timestamp_last_where_symbol( database::assets::update_timestamp_last_where_symbol(
&app_config.postgres_pool, &app_config.postgres_pool,
&asset.symbol, &asset.symbol,

View File

@@ -230,22 +230,21 @@ async fn handle_message(
incoming::Message::Bars(bar_message) => { incoming::Message::Bars(bar_message) => {
let bar = Bar::from(bar_message); let bar = Bar::from(bar_message);
info!("Incoming bar for {}: {}", bar.asset_symbol, bar.timestamp); info!("Incoming bar for {}: {}", bar.asset_symbol, bar.timestamp);
database::bars::upsert(
let transaction = app_config.postgres_pool.begin().await.unwrap(); &app_config.postgres_pool,
database::bars::upsert(&app_config.postgres_pool, &bar).await; &bar,
if *backfilled.read().await.get(&bar.asset_symbol).unwrap() { backfilled.read().await[&bar.asset_symbol],
database::bars_filled::upsert(&app_config.postgres_pool, &bar).await; )
} .await;
transaction.commit().await.unwrap();
} }
incoming::Message::UpdatedBars(bar_message) => { incoming::Message::UpdatedBars(bar_message) => {
let bar = Bar::from(bar_message); let bar = Bar::from(bar_message);
info!("Incoming bar for {}: {}", bar.asset_symbol, bar.timestamp); info!("Incoming bar for {}: {}", bar.asset_symbol, bar.timestamp);
let transaction = app_config.postgres_pool.begin().await.unwrap(); let transaction = app_config.postgres_pool.begin().await.unwrap();
database::bars::upsert(&app_config.postgres_pool, &bar).await; let backfilled_asset_symbol = backfilled.read().await[&bar.asset_symbol];
if *backfilled.read().await.get(&bar.asset_symbol).unwrap() { database::bars::upsert(&app_config.postgres_pool, &bar, backfilled_asset_symbol).await;
database::bars_filled::upsert(&app_config.postgres_pool, &bar).await; if backfilled_asset_symbol {
database::assets::update_timestamp_last_where_symbol( database::assets::update_timestamp_last_where_symbol(
&app_config.postgres_pool, &app_config.postgres_pool,
&bar.asset_symbol, &bar.asset_symbol,
@@ -287,21 +286,20 @@ async fn null_handler(app_config: Arc<Config>, backfilled: Arc<RwLock<HashMap<St
let bar = Bar::empty(timestamp, asset_symbol); let bar = Bar::empty(timestamp, asset_symbol);
let transaction = app_config.postgres_pool.begin().await.unwrap(); let transaction = app_config.postgres_pool.begin().await.unwrap();
database::bars::insert_or_skip(
database::bars::insert_or_skip(&app_config.postgres_pool, &bar).await; &app_config.postgres_pool,
if *backfilled.get(&bar.asset_symbol).unwrap() { &bar,
database::bars_filled::insert_or_skip(&app_config.postgres_pool, &bar).await; backfilled[&bar.asset_symbol],
)
if state == NullHandlerState::UpdatedBars { .await;
database::assets::update_timestamp_last_where_symbol( if backfilled[&bar.asset_symbol] && state == NullHandlerState::Bars {
&app_config.postgres_pool, database::assets::update_timestamp_last_where_symbol(
&bar.asset_symbol, &app_config.postgres_pool,
&bar.timestamp, &bar.asset_symbol,
) &bar.timestamp,
.await; )
} .await;
} }
transaction.commit().await.unwrap(); transaction.commit().await.unwrap();
} }

View File

@@ -1,9 +1,53 @@
use crate::types::Bar; use crate::types::Bar;
use sqlx::{query_as, PgPool, Postgres}; use sqlx::{query_as, PgPool, Postgres};
use std::convert::Into;
use time::OffsetDateTime; use time::OffsetDateTime;
pub async fn upsert(postgres_pool: &PgPool, bar: &Bar) -> Bar { pub async fn select_not_null_where_symbol_where_timestamp_smaller_than_order_by_timestamp_desc_limit_one(
postgres_pool: &PgPool,
symbol: &str,
timestamp: &OffsetDateTime,
) -> Bar {
query_as!(
Bar,
r#"SELECT * FROM bars WHERE asset_symbol = $1 AND timestamp < $2 AND open IS NOT NULL AND high IS NOT NULL AND low IS NOT NULL AND close IS NOT NULL ORDER BY timestamp DESC LIMIT 1"#,
symbol,
timestamp
)
.fetch_one(postgres_pool)
.await
.unwrap()
}
pub async fn select_where_symbol_where_timestamp_larger_than(
postgres_pool: &PgPool,
symbol: &str,
timestamp: &OffsetDateTime,
) -> Vec<Bar> {
query_as!(
Bar,
r#"SELECT * FROM bars WHERE asset_symbol = $1 AND timestamp > $2 ORDER BY timestamp ASC"#,
symbol,
timestamp
)
.fetch_all(postgres_pool)
.await
.unwrap()
}
pub async fn upsert(postgres_pool: &PgPool, bar: &Bar, backfill: bool) -> Bar {
let mut bar = bar.clone();
if backfill
&& (bar.open.is_none() || bar.high.is_none() || bar.low.is_none() || bar.close.is_none())
{
let filled_bar = select_not_null_where_symbol_where_timestamp_smaller_than_order_by_timestamp_desc_limit_one(
postgres_pool,
&bar.asset_symbol,
&bar.timestamp,
).await;
bar.merge_empty(&filled_bar);
}
query_as!( query_as!(
Bar, Bar,
r#"INSERT INTO bars (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) r#"INSERT INTO bars (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
@@ -16,7 +60,20 @@ pub async fn upsert(postgres_pool: &PgPool, bar: &Bar) -> Bar {
.unwrap() .unwrap()
} }
pub async fn insert_or_skip(postgres_pool: &PgPool, bar: &Bar) { pub async fn insert_or_skip(postgres_pool: &PgPool, bar: &Bar, backfill: bool) {
let mut bar = bar.clone();
if backfill
&& (bar.open.is_none() || bar.high.is_none() || bar.low.is_none() || bar.close.is_none())
{
let filled_bar = select_not_null_where_symbol_where_timestamp_smaller_than_order_by_timestamp_desc_limit_one(
postgres_pool,
&bar.asset_symbol,
&bar.timestamp,
).await;
bar.merge_empty(&filled_bar);
}
query_as!( query_as!(
Bar, Bar,
r#"INSERT INTO bars (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) r#"INSERT INTO bars (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
@@ -28,7 +85,27 @@ pub async fn insert_or_skip(postgres_pool: &PgPool, bar: &Bar) {
.unwrap(); .unwrap();
} }
pub async fn upsert_batch(postgres_pool: &PgPool, bars: &[Bar]) -> Vec<Bar> { pub async fn upsert_batch(postgres_pool: &PgPool, bars: &[Bar], backfill: bool) -> Vec<Bar> {
let mut bars = bars.to_vec();
if bars.is_empty() {
return bars;
}
if backfill
&& (bars[0].open.is_none()
|| bars[0].high.is_none()
|| bars[0].low.is_none()
|| bars[0].close.is_none())
{
let filled_bar = select_not_null_where_symbol_where_timestamp_smaller_than_order_by_timestamp_desc_limit_one(
postgres_pool,
&bars[0].asset_symbol,
&bars[0].timestamp,
).await;
bars[0].merge_empty(&filled_bar);
}
let mut timestamp = Vec::with_capacity(bars.len()); let mut timestamp = Vec::with_capacity(bars.len());
let mut asset_symbol = Vec::with_capacity(bars.len()); let mut asset_symbol = Vec::with_capacity(bars.len());
let mut open = Vec::with_capacity(bars.len()); let mut open = Vec::with_capacity(bars.len());
@@ -39,7 +116,18 @@ pub async fn upsert_batch(postgres_pool: &PgPool, bars: &[Bar]) -> Vec<Bar> {
let mut num_trades = Vec::with_capacity(bars.len()); let mut num_trades = Vec::with_capacity(bars.len());
let mut volume_weighted = Vec::with_capacity(bars.len()); let mut volume_weighted = Vec::with_capacity(bars.len());
for bar in bars { let mut last_filled_bar = bars[0].clone();
for mut bar in bars {
if backfill {
if bar.open.is_none() || bar.high.is_none() || bar.low.is_none() || bar.close.is_none()
{
bar.merge_empty(&last_filled_bar);
} else {
last_filled_bar = bar.clone();
}
}
timestamp.push(bar.timestamp); timestamp.push(bar.timestamp);
asset_symbol.push(bar.asset_symbol.clone()); asset_symbol.push(bar.asset_symbol.clone());
open.push(bar.open); open.push(bar.open);
@@ -71,19 +159,3 @@ pub async fn upsert_batch(postgres_pool: &PgPool, bars: &[Bar]) -> Vec<Bar> {
.await .await
.unwrap() .unwrap()
} }
pub async fn select_where_symbol_where_timestamp_larger_than(
postgres_pool: &PgPool,
symbol: &str,
timestamp: &OffsetDateTime,
) -> Vec<Bar> {
query_as!(
Bar,
r#"SELECT * FROM bars WHERE asset_symbol = $1 AND timestamp > $2 ORDER BY timestamp ASC"#,
symbol,
timestamp
)
.fetch_all(postgres_pool)
.await
.unwrap()
}

View File

@@ -1,133 +0,0 @@
use crate::types::Bar;
use sqlx::{query_as, PgPool, Postgres};
use std::convert::Into;
pub async fn upsert(postgres_pool: &PgPool, bar: &Bar) -> Bar {
let mut bar = bar.clone();
if bar.open.is_none() || bar.high.is_none() || bar.low.is_none() || bar.close.is_none() {
let filled_bar = query_as!(
Bar,
r#"SELECT * FROM bars_filled WHERE timestamp < $1 AND asset_symbol = $2 ORDER BY timestamp DESC LIMIT 1"#,
bar.timestamp,
bar.asset_symbol
)
.fetch_one(postgres_pool)
.await
.unwrap();
bar.merge_empty(&filled_bar);
}
query_as!(
Bar,
r#"INSERT INTO bars_filled (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (timestamp, asset_symbol) DO UPDATE SET open = $3, high = $4, low = $5, close = $6, volume = $7, num_trades = $8, volume_weighted = $9
RETURNING timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted"#,
bar.timestamp, bar.asset_symbol, bar.open, bar.high, bar.low, bar.close, bar.volume, bar.num_trades, bar.volume_weighted
)
.fetch_one(postgres_pool)
.await
.unwrap()
}
pub async fn insert_or_skip(postgres_pool: &PgPool, bar: &Bar) {
let mut bar = bar.clone();
if bar.open.is_none() || bar.high.is_none() || bar.low.is_none() || bar.close.is_none() {
let filled_bar = query_as!(
Bar,
r#"SELECT * FROM bars_filled WHERE timestamp < $1 AND asset_symbol = $2 ORDER BY timestamp DESC LIMIT 1"#,
bar.timestamp,
bar.asset_symbol
)
.fetch_one(postgres_pool)
.await
.unwrap();
bar.merge_empty(&filled_bar);
}
query_as!(
Bar,
r#"INSERT INTO bars_filled (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (timestamp, asset_symbol) DO NOTHING"#,
bar.timestamp, bar.asset_symbol, bar.open, bar.high, bar.low, bar.close, bar.volume, bar.num_trades, bar.volume_weighted
)
.execute(postgres_pool)
.await
.unwrap();
}
pub async fn upsert_batch(postgres_pool: &PgPool, bars: &[Bar]) -> Vec<Bar> {
let mut bars = bars.to_vec();
if bars.is_empty() {
return bars;
}
if bars[0].open.is_none()
|| bars[0].high.is_none()
|| bars[0].low.is_none()
|| bars[0].close.is_none()
{
let filled_bar = &query_as!(
Bar,
r#"SELECT * FROM bars_filled WHERE timestamp < $1 AND asset_symbol = $2 ORDER BY timestamp DESC LIMIT 1"#,
bars[0].timestamp,
bars[0].asset_symbol
)
.fetch_one(postgres_pool)
.await
.unwrap();
bars[0].merge_empty(filled_bar);
}
let mut timestamp = Vec::with_capacity(bars.len());
let mut asset_symbol = Vec::with_capacity(bars.len());
let mut open = Vec::with_capacity(bars.len());
let mut high = Vec::with_capacity(bars.len());
let mut low = Vec::with_capacity(bars.len());
let mut close = Vec::with_capacity(bars.len());
let mut volume = Vec::with_capacity(bars.len());
let mut num_trades = Vec::with_capacity(bars.len());
let mut volume_weighted = Vec::with_capacity(bars.len());
let mut last_filled_bar = bars[0].clone();
for mut bar in bars {
if bar.open.is_none() || bar.high.is_none() || bar.low.is_none() || bar.close.is_none() {
bar.merge_empty(&last_filled_bar);
} else {
last_filled_bar = bar.clone();
}
timestamp.push(bar.timestamp);
asset_symbol.push(bar.asset_symbol.clone());
open.push(bar.open);
high.push(bar.high);
low.push(bar.low);
close.push(bar.close);
volume.push(bar.volume);
num_trades.push(bar.num_trades);
volume_weighted.push(bar.volume_weighted);
}
// No type-safety here because of NULLABLE bulk insert
query_as::<Postgres, Bar>(
r#"INSERT INTO bars_filled (timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted)
SELECT * FROM UNNEST($1::timestamptz[], $2::text[], $3::float8[], $4::float8[], $5::float8[], $6::float8[], $7::float8[], $8::int8[], $9::float8[])
ON CONFLICT (timestamp, asset_symbol) DO UPDATE SET open = EXCLUDED.open, high = EXCLUDED.high, low = EXCLUDED.low, close = EXCLUDED.close, volume = EXCLUDED.volume, num_trades = EXCLUDED.num_trades, volume_weighted = EXCLUDED.volume_weighted
RETURNING timestamp, asset_symbol, open, high, low, close, volume, num_trades, volume_weighted"#,
)
.bind(timestamp)
.bind(asset_symbol)
.bind(open)
.bind(high)
.bind(low)
.bind(close)
.bind(volume)
.bind(num_trades)
.bind(volume_weighted)
.fetch_all(postgres_pool)
.await
.unwrap()
}

View File

@@ -1,3 +1,2 @@
pub mod assets; pub mod assets;
pub mod bars; pub mod bars;
pub mod bars_filled;

View File

@@ -39,18 +39,10 @@ psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
SELECT create_hypertable('bars', 'timestamp', 'asset_symbol', 15); SELECT create_hypertable('bars', 'timestamp', 'asset_symbol', 15);
CREATE TABLE bars_filled ( ALTER TABLE bars SET (
timestamp TIMESTAMPTZ, timescaledb.compress,
asset_symbol TEXT REFERENCES assets(symbol) ON DELETE CASCADE ON UPDATE CASCADE, timescaledb.compress_segmentby = 'asset_symbol'
open DOUBLE PRECISION NOT NULL,
high DOUBLE PRECISION NOT NULL,
low DOUBLE PRECISION NOT NULL,
close DOUBLE PRECISION NOT NULL,
volume DOUBLE PRECISION NOT NULL,
num_trades BIGINT NOT NULL,
volume_weighted DOUBLE PRECISION NOT NULL,
PRIMARY KEY (asset_symbol, timestamp)
); );
SELECT create_hypertable('bars_filled', 'timestamp', 'asset_symbol', 15); SELECT add_compression_policy('bars', INTERVAL '30 days');
EOSQL EOSQL