make worker a struct for readability

This commit is contained in:
phiresky 2023-08-03 12:58:46 +00:00
parent 7f82bd07fe
commit 7e72ad87fe
2 changed files with 212 additions and 146 deletions

View File

@ -1,11 +1,12 @@
use crate::{ use crate::{
util::{retry_sleep_duration, CancellableTask}, util::{retry_sleep_duration, CancellableTask},
worker::instance_worker, worker::InstanceWorker,
}; };
use activitypub_federation::config::FederationConfig; use activitypub_federation::config::FederationConfig;
use chrono::{Local, Timelike}; use chrono::{Local, Timelike};
use clap::Parser; use clap::Parser;
use federation_queue_state::FederationQueueState; use federation_queue_state::FederationQueueState;
use lemmy_api_common::context::LemmyContext;
use lemmy_db_schema::{ use lemmy_db_schema::{
source::instance::Instance, source::instance::Instance,
utils::{ActualDbPool, DbPool}, utils::{ActualDbPool, DbPool},
@ -36,10 +37,10 @@ pub struct Opts {
pub process_index: i32, pub process_index: i32,
} }
async fn start_stop_federation_workers<T: Clone + Send + Sync + 'static>( async fn start_stop_federation_workers(
opts: Opts, opts: Opts,
pool: ActualDbPool, pool: ActualDbPool,
federation_config: FederationConfig<T>, federation_config: FederationConfig<LemmyContext>,
cancel: CancellationToken, cancel: CancellationToken,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let mut workers = HashMap::new(); let mut workers = HashMap::new();
@ -68,16 +69,20 @@ async fn start_stop_federation_workers<T: Clone + Send + Sync + 'static>(
let should_federate = allowed && !is_dead; let should_federate = allowed && !is_dead;
if !workers.contains_key(&instance.id) && should_federate { if !workers.contains_key(&instance.id) && should_federate {
let stats_sender = stats_sender.clone(); let stats_sender = stats_sender.clone();
let context = federation_config.to_request_data();
let pool = pool.clone();
workers.insert( workers.insert(
instance.id, instance.id,
CancellableTask::spawn(WORKER_EXIT_TIMEOUT, |stop| { CancellableTask::spawn(WORKER_EXIT_TIMEOUT, |stop| async move {
instance_worker( InstanceWorker::init_and_loop(
pool.clone(),
instance, instance,
federation_config.to_request_data(), context,
&mut DbPool::Pool(&pool),
stop, stop,
stats_sender, stats_sender,
) )
.await?;
Ok(())
}), }),
); );
} else if !should_federate { } else if !should_federate {
@ -112,7 +117,7 @@ async fn start_stop_federation_workers<T: Clone + Send + Sync + 'static>(
pub fn start_stop_federation_workers_cancellable( pub fn start_stop_federation_workers_cancellable(
opts: Opts, opts: Opts,
pool: ActualDbPool, pool: ActualDbPool,
config: FederationConfig<impl Clone + Send + Sync + 'static>, config: FederationConfig<LemmyContext>,
) -> CancellableTask<()> { ) -> CancellableTask<()> {
CancellableTask::spawn(WORKER_EXIT_TIMEOUT, move |c| { CancellableTask::spawn(WORKER_EXIT_TIMEOUT, move |c| {
start_stop_federation_workers(opts, pool, config, c) start_stop_federation_workers(opts, pool, config, c)

View File

@ -8,6 +8,8 @@ use activitypub_federation::{
}; };
use anyhow::Result; use anyhow::Result;
use chrono::{DateTime, TimeZone, Utc}; use chrono::{DateTime, TimeZone, Utc};
use lemmy_api_common::context::LemmyContext;
use lemmy_apub::activity_lists::SharedInboxActivities;
use lemmy_db_schema::{ use lemmy_db_schema::{
newtypes::{CommunityId, InstanceId}, newtypes::{CommunityId, InstanceId},
source::{activity::SentActivity, instance::Instance, site::Site}, source::{activity::SentActivity, instance::Instance, site::Site},
@ -26,174 +28,233 @@ use tokio_util::sync::CancellationToken;
static CHECK_SAVE_STATE_EVERY_IT: i64 = 100; static CHECK_SAVE_STATE_EVERY_IT: i64 = 100;
static SAVE_STATE_EVERY_TIME: Duration = Duration::from_secs(10); static SAVE_STATE_EVERY_TIME: Duration = Duration::from_secs(10);
/// loop fetch new activities from db and send them to the inboxes of the given instances pub(crate) struct InstanceWorker {
/// this worker only returns if (a) there is an internal error or (b) the cancellation token is cancelled (graceful exit)
pub async fn instance_worker(
pool: ActualDbPool,
instance: Instance, instance: Instance,
data: Data<impl Clone>, site: Option<Site>,
followed_communities: HashMap<CommunityId, HashSet<Url>>,
stop: CancellationToken, stop: CancellationToken,
context: Data<LemmyContext>,
stats_sender: UnboundedSender<FederationQueueState>, stats_sender: UnboundedSender<FederationQueueState>,
) -> Result<(), anyhow::Error> { last_full_communities_fetch: DateTime<Utc>,
let pool = &mut DbPool::Pool(&pool); last_incremental_communities_fetch: DateTime<Utc>,
let mut last_full_communities_fetch = Utc.timestamp_nanos(0); state: FederationQueueState,
let mut last_incremental_communities_fetch = Utc.timestamp_nanos(0); last_state_insert: DateTime<Utc>,
let mut last_state_insert = Utc.timestamp_nanos(0); }
let mut followed_communities: HashMap<CommunityId, HashSet<Url>> =
get_communities(pool, instance.id, &mut last_incremental_communities_fetch).await?;
let site = Site::read_from_instance_id(pool, instance.id).await?;
let mut state = FederationQueueState::load(pool, &instance.domain).await?; impl InstanceWorker {
if state.fail_count > 0 { pub(crate) async fn init_and_loop(
// before starting queue, sleep remaining duration instance: Instance,
let elapsed = (Utc::now() - state.last_retry).to_std()?; context: Data<LemmyContext>,
let remaining = retry_sleep_duration(state.fail_count) - elapsed; pool: &mut DbPool<'_>, // in theory there's a ref to the pool in context, but i couldn't get that to work wrt lifetimes
tokio::select! { stop: CancellationToken,
() = sleep(remaining) => {}, stats_sender: UnboundedSender<FederationQueueState>,
() = stop.cancelled() => { return Ok(()); } ) -> Result<(), anyhow::Error> {
} let site = Site::read_from_instance_id(pool, instance.id).await?;
let state = FederationQueueState::load(pool, &instance.domain).await?;
let mut worker = InstanceWorker {
instance,
site,
followed_communities: HashMap::new(),
stop,
context,
stats_sender,
last_full_communities_fetch: Utc.timestamp_nanos(0),
last_incremental_communities_fetch: Utc.timestamp_nanos(0),
state,
last_state_insert: Utc.timestamp_nanos(0),
};
worker.loop_until_stopped(pool).await
} }
while !stop.is_cancelled() { /// loop fetch new activities from db and send them to the inboxes of the given instances
/// this worker only returns if (a) there is an internal error or (b) the cancellation token is cancelled (graceful exit)
pub(crate) async fn loop_until_stopped(
&mut self,
pool: &mut DbPool<'_>,
) -> Result<(), anyhow::Error> {
self.update_communities(pool).await;
self.initial_fail_sleep().await;
while !self.stop.is_cancelled() {
self.loop_batch(pool).await?;
if self.stop.is_cancelled() {
break;
}
if Utc::now() - self.last_state_insert
> chrono::Duration::from_std(SAVE_STATE_EVERY_TIME).expect("not negative")
{
self.last_state_insert = Utc::now();
FederationQueueState::upsert(pool, &self.state).await?;
self.stats_sender.send(self.state.clone())?;
}
self.update_communities(pool).await;
}
// final update of state in db
FederationQueueState::upsert(pool, &self.state).await?;
Ok(())
}
async fn initial_fail_sleep(&mut self) -> Result<()> {
// before starting queue, sleep remaining duration if last request failed
if self.state.fail_count > 0 {
let elapsed = (Utc::now() - self.state.last_retry).to_std()?;
let remaining = retry_sleep_duration(self.state.fail_count) - elapsed;
tokio::select! {
() = sleep(remaining) => {},
() = self.stop.cancelled() => {}
}
}
Ok(())
}
async fn loop_batch(&mut self, pool: &mut DbPool<'_>) -> Result<()> {
let latest_id = get_latest_activity_id(pool).await?; let latest_id = get_latest_activity_id(pool).await?;
let mut id = state.last_successful_id; let mut id = self.state.last_successful_id;
if id == latest_id { if id == latest_id {
// no more work to be done, wait before rechecking // no more work to be done, wait before rechecking
tokio::select! { tokio::select! {
() = sleep(Duration::from_secs(10)) => { continue; }, () = sleep(Duration::from_secs(10)) => {},
() = stop.cancelled() => { return Ok(()); } () = self.stop.cancelled() => {}
} }
return Ok(());
} }
let mut processed_activities = 0; let mut processed_activities = 0;
'batch: while id < latest_id while id < latest_id
&& processed_activities < CHECK_SAVE_STATE_EVERY_IT && processed_activities < CHECK_SAVE_STATE_EVERY_IT
&& !stop.is_cancelled() && !self.stop.is_cancelled()
{ {
id += 1; id += 1;
processed_activities += 1; processed_activities += 1;
let Some(ele) = get_activity_cached(pool, id).await? else { let Some(ele) = get_activity_cached(pool, id).await? else {
state.last_successful_id = id; self.state.last_successful_id = id;
continue; continue;
}; };
let (activity, object) = (&ele.0, &ele.1); self.send_retry_loop(pool, &ele.0, &ele.1).await?;
let inbox_urls = get_inbox_urls(&instance, &site, &followed_communities, activity); if self.stop.is_cancelled() {
if inbox_urls.is_empty() { return Ok(());
state.last_successful_id = id;
continue;
} }
let Some(actor_apub_id) = &activity.actor_apub_id else { // send success!
continue; // activity was inserted before persistent queue was activated self.state.last_successful_id = id;
}; self.state.fail_count = 0;
let actor = get_actor_cached(pool, activity.actor_type, actor_apub_id).await?; }
Ok(())
}
let inbox_urls = inbox_urls.into_iter().collect(); /** this function will only return if (a) send succeeded or (b) worker cancelled */
let requests = prepare_raw(object, actor.as_ref(), inbox_urls, &data) async fn send_retry_loop(
.await &mut self,
.into_anyhow()?; pool: &mut DbPool<'_>,
for task in requests { activity: &SentActivity,
// usually only one due to shared inbox object: &SharedInboxActivities,
let mut req = sign_raw(&task, &data, REQWEST_TIMEOUT).await?; ) -> Result<()> {
tracing::info!("sending out {}", task); let inbox_urls = self.get_inbox_urls(activity);
while let Err(e) = send_raw(&task, &data, req).await { if inbox_urls.is_empty() {
state.fail_count += 1; self.state.last_successful_id = activity.id;
state.last_retry = Utc::now(); return Ok(());
let retry_delay: Duration = retry_sleep_duration(state.fail_count); }
tracing::info!( let Some(actor_apub_id) = &activity.actor_apub_id else {
"{}: retrying {id} attempt {} with delay {retry_delay:.2?}. ({e})", return Ok(()); // activity was inserted before persistent queue was activated
instance.domain, };
state.fail_count let actor = get_actor_cached(pool, activity.actor_type, actor_apub_id).await?;
);
stats_sender.send(state.clone())?; let inbox_urls = inbox_urls.into_iter().collect();
FederationQueueState::upsert(pool, &state).await?; let requests = prepare_raw(object, actor.as_ref(), inbox_urls, &self.context)
req = sign_raw(&task, &data, REQWEST_TIMEOUT).await?; // resign request .await
tokio::select! { .into_anyhow()?;
() = sleep(retry_delay) => {}, for task in requests {
() = stop.cancelled() => { // usually only one due to shared inbox
// save state to db and exit let mut req = sign_raw(&task, &self.context, REQWEST_TIMEOUT).await?;
break 'batch; tracing::info!("sending out {}", task);
} while let Err(e) = send_raw(&task, &self.context, req).await {
self.state.fail_count += 1;
self.state.last_retry = Utc::now();
let retry_delay: Duration = retry_sleep_duration(self.state.fail_count);
tracing::info!(
"{}: retrying {} attempt {} with delay {retry_delay:.2?}. ({e})",
self.instance.domain,
activity.id,
self.state.fail_count
);
self.stats_sender.send(self.state.clone())?;
FederationQueueState::upsert(pool, &self.state).await?;
req = sign_raw(&task, &self.context, REQWEST_TIMEOUT).await?; // resign request
tokio::select! {
() = sleep(retry_delay) => {},
() = self.stop.cancelled() => {
// save state to db and exit
return Ok(());
} }
} }
} }
// send success!
state.last_successful_id = id;
state.fail_count = 0;
} }
Ok(())
}
if Utc::now() - last_state_insert /// get inbox urls of sending the given activity to the given instance
> chrono::Duration::from_std(SAVE_STATE_EVERY_TIME).expect("not negative") /// most often this will return 0 values (if instance doesn't care about the activity)
{ /// or 1 value (the shared inbox)
last_state_insert = Utc::now(); /// > 1 values only happens for non-lemmy software
FederationQueueState::upsert(pool, &state).await?; fn get_inbox_urls(&self, activity: &SentActivity) -> HashSet<Url> {
stats_sender.send(state.clone())?; let mut inbox_urls: HashSet<Url> = HashSet::new();
}
{ if activity.send_all_instances {
// update communities if let Some(site) = &self.site {
if (Utc::now() - last_incremental_communities_fetch) > chrono::Duration::seconds(10) { // Nutomic: Most non-lemmy software wont have a site row. That means it cant handle these activities. So handling it like this is fine.
// process additions every 10s inbox_urls.insert(site.inbox_url.inner().clone());
followed_communities.extend(
get_communities(pool, instance.id, &mut last_incremental_communities_fetch).await?,
);
}
if (Utc::now() - last_full_communities_fetch) > chrono::Duration::seconds(300) {
// process removals every 5min
last_full_communities_fetch = Utc.timestamp_nanos(0);
followed_communities =
get_communities(pool, instance.id, &mut last_full_communities_fetch).await?;
last_incremental_communities_fetch = last_full_communities_fetch;
} }
} }
} for t in &activity.send_community_followers_of {
if let Some(urls) = self.followed_communities.get(t) {
Ok(()) inbox_urls.extend(urls.iter().map(std::clone::Clone::clone));
} }
/// get inbox urls of sending the given activity to the given instance
/// most often this will return 0 values (if instance doesn't care about the activity)
/// or 1 value (the shared inbox)
/// > 1 values only happens for non-lemmy software
fn get_inbox_urls(
instance: &Instance,
site: &Option<Site>,
followed_communities: &HashMap<CommunityId, HashSet<Url>>,
activity: &SentActivity,
) -> HashSet<Url> {
let mut inbox_urls: HashSet<Url> = HashSet::new();
if activity.send_all_instances {
if let Some(site) = &site {
// Nutomic: Most non-lemmy software wont have a site row. That means it cant handle these activities. So handling it like this is fine.
inbox_urls.insert(site.inbox_url.inner().clone());
} }
} for inbox in &activity.send_inboxes {
for t in &activity.send_community_followers_of { if inbox.domain() != Some(&self.instance.domain) {
if let Some(urls) = followed_communities.get(t) { continue;
inbox_urls.extend(urls.iter().map(std::clone::Clone::clone)); }
inbox_urls.insert(inbox.inner().clone());
} }
inbox_urls
} }
for inbox in &activity.send_inboxes {
if inbox.domain() != Some(&instance.domain) {
continue;
}
inbox_urls.insert(inbox.inner().clone());
}
inbox_urls
}
/// get a list of local communities with the remote inboxes on the given instance that cares about them async fn update_communities(&mut self, pool: &mut DbPool<'_>) -> Result<()> {
async fn get_communities( if (Utc::now() - self.last_full_communities_fetch) > chrono::Duration::seconds(300) {
pool: &mut DbPool<'_>, // process removals every 5min
instance_id: InstanceId, self.last_full_communities_fetch = Utc.timestamp_nanos(0);
last_fetch: &mut DateTime<Utc>, (self.followed_communities, self.last_full_communities_fetch) = self
) -> Result<HashMap<CommunityId, HashSet<Url>>> { .get_communities(pool, self.instance.id, self.last_full_communities_fetch)
let e = *last_fetch; .await?;
*last_fetch = Utc::now(); // update to time before fetch to ensure overlap self.last_incremental_communities_fetch = self.last_full_communities_fetch;
Ok( }
CommunityFollowerView::get_instance_followed_community_inboxes(pool, instance_id, e) if (Utc::now() - self.last_incremental_communities_fetch) > chrono::Duration::seconds(10) {
.await? let (news, time) = self
.into_iter() .get_communities(
.fold(HashMap::new(), |mut map, (c, u)| { pool,
map.entry(c).or_insert_with(HashSet::new).insert(u.into()); self.instance.id,
map self.last_incremental_communities_fetch,
}), )
) .await?;
// process additions every 10s
self.followed_communities.extend(news);
self.last_incremental_communities_fetch = time;
}
Ok(())
}
/// get a list of local communities with the remote inboxes on the given instance that cares about them
async fn get_communities(
&mut self,
pool: &mut DbPool<'_>,
instance_id: InstanceId,
last_fetch: DateTime<Utc>,
) -> Result<(HashMap<CommunityId, HashSet<Url>>, DateTime<Utc>)> {
let new_last_fetch = Utc::now(); // update to time before fetch to ensure overlap
Ok((
CommunityFollowerView::get_instance_followed_community_inboxes(pool, instance_id, last_fetch)
.await?
.into_iter()
.fold(HashMap::new(), |mut map, (c, u)| {
map.entry(c).or_insert_with(HashSet::new).insert(u.into());
map
}),
new_last_fetch,
))
}
} }