Fix backfill sentiment batching bug

Signed-off-by: Nikolaos Karaolidis <nick@karaolidis.com>
This commit is contained in:
2024-03-12 21:00:11 +00:00
parent d2d20e2978
commit 5ed0c7670a
7 changed files with 141 additions and 110 deletions

View File

@@ -13,7 +13,7 @@ use rust_bert::{
resources::LocalResource,
};
use std::{env, num::NonZeroU32, path::PathBuf, sync::Arc};
use tokio::sync::{Mutex, Semaphore};
use tokio::sync::Semaphore;
lazy_static! {
pub static ref ALPACA_MODE: Mode = env::var("ALPACA_MODE")
@@ -32,6 +32,14 @@ lazy_static! {
env::var("ALPACA_API_KEY").expect("ALPACA_API_KEY must be set.");
pub static ref ALPACA_API_SECRET: String =
env::var("ALPACA_API_SECRET").expect("ALPACA_API_SECRET must be set.");
pub static ref BATCH_BACKFILL_BARS_SIZE: usize = env::var("BATCH_BACKFILL_BARS_SIZE")
.expect("BATCH_BACKFILL_BARS_SIZE must be set.")
.parse()
.expect("BATCH_BACKFILL_BARS_SIZE must be a positive integer.");
pub static ref BATCH_BACKFILL_NEWS_SIZE: usize = env::var("BATCH_BACKFILL_NEWS_SIZE")
.expect("BATCH_BACKFILL_NEWS_SIZE must be set.")
.parse()
.expect("BATCH_BACKFILL_NEWS_SIZE must be a positive integer.");
pub static ref BERT_MAX_INPUTS: usize = env::var("BERT_MAX_INPUTS")
.expect("BERT_MAX_INPUTS must be set.")
.parse()
@@ -47,7 +55,7 @@ pub struct Config {
pub alpaca_rate_limiter: DefaultDirectRateLimiter,
pub clickhouse_client: clickhouse::Client,
pub clickhouse_concurrency_limiter: Arc<Semaphore>,
pub sequence_classifier: Mutex<SequenceClassificationModel>,
pub sequence_classifier: std::sync::Mutex<SequenceClassificationModel>,
}
impl Config {
@@ -81,7 +89,7 @@ impl Config {
)
.with_database(env::var("CLICKHOUSE_DB").expect("CLICKHOUSE_DB must be set.")),
clickhouse_concurrency_limiter: Arc::new(Semaphore::new(*CLICKHOUSE_MAX_CONNECTIONS)),
sequence_classifier: Mutex::new(
sequence_classifier: std::sync::Mutex::new(
SequenceClassificationModel::new(SequenceClassificationConfig::new(
ModelType::Bert,
ModelResource::Torch(Box::new(LocalResource {