Post view: move cursor pagination to separate library, add backward pagination to PostQuery (#4320)

* stuff

* stuff

* crates.io

* Update up.sql

* Rerun federation tests

* Update post_view.rs

* Update post_view.rs

* Update up.sql

* Update utils.rs

* Fix precision loss

* Update up.sql

* Update down.sql

* remove unwrap

* Update post_view.rs

---------

Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
upgrade_deps_18
dullbananas 2024-01-24 08:50:11 -07:00 committed by GitHub
parent 759f6d8a9a
commit d8f9e8a64c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 195 additions and 139 deletions

22
Cargo.lock generated
View File

@ -2298,6 +2298,26 @@ dependencies = [
"tokio-native-tls",
]
[[package]]
name = "i-love-jesus"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39fa60e3281e1529cc56d96cca925215f51f9b39a96bc677982fbfdf2663cc84"
dependencies = [
"diesel",
"i-love-jesus-macros",
]
[[package]]
name = "i-love-jesus-macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8215279f83f9b829403812f845aa2d0dd5966332aa2fd0334a375256f3dd0322"
dependencies = [
"quote",
"syn 2.0.40",
]
[[package]]
name = "iana-time-zone"
version = "0.1.58"
@ -2683,6 +2703,7 @@ dependencies = [
"diesel_ltree",
"diesel_migrations",
"futures-util",
"i-love-jesus",
"lemmy_utils",
"once_cell",
"pretty_assertions",
@ -2713,6 +2734,7 @@ dependencies = [
"diesel",
"diesel-async",
"diesel_ltree",
"i-love-jesus",
"lemmy_db_schema",
"lemmy_utils",
"pretty_assertions",

View File

@ -157,6 +157,7 @@ tokio-postgres = "0.7.10"
tokio-postgres-rustls = "0.10.0"
enum-map = "2.7"
moka = { version = "0.12.1", features = ["future"] }
i-love-jesus = { version = "0.1.0" }
clap = { version = "4.4.11", features = ["derive"] }
pretty_assertions = "1.4.0"

View File

@ -36,6 +36,7 @@ full = [
"tokio-postgres",
"tokio-postgres-rustls",
"rustls",
"i-love-jesus",
]
[dependencies]
@ -76,6 +77,7 @@ tokio-postgres = { workspace = true, optional = true }
tokio-postgres-rustls = { workspace = true, optional = true }
rustls = { workspace = true, optional = true }
uuid = { workspace = true, features = ["v4"] }
i-love-jesus = { workspace = true, optional = true }
anyhow = { workspace = true }
[dev-dependencies]

View File

@ -9,6 +9,8 @@ use crate::schema::{
site_aggregates,
};
use chrono::{DateTime, Utc};
#[cfg(feature = "full")]
use i_love_jesus::CursorKeysModule;
use serde::{Deserialize, Serialize};
#[cfg(feature = "full")]
use ts_rs::TS;
@ -93,13 +95,21 @@ pub struct PersonAggregates {
#[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(
feature = "full",
derive(Queryable, Selectable, Associations, Identifiable, TS)
derive(
Queryable,
Selectable,
Associations,
Identifiable,
TS,
CursorKeysModule
)
)]
#[cfg_attr(feature = "full", diesel(table_name = post_aggregates))]
#[cfg_attr(feature = "full", diesel(belongs_to(crate::source::post::Post)))]
#[cfg_attr(feature = "full", diesel(primary_key(post_id)))]
#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
#[cfg_attr(feature = "full", ts(export))]
#[cfg_attr(feature = "full", cursor_keys_module(name = post_aggregates_keys))]
/// Aggregate data for a post.
pub struct PostAggregates {
pub post_id: PostId,

View File

@ -18,7 +18,7 @@ use diesel::{
query_dsl::methods::LimitDsl,
result::{ConnectionError, ConnectionResult, Error as DieselError, Error::QueryBuilderError},
serialize::{Output, ToSql},
sql_types::{Text, Timestamptz},
sql_types::{self, Text, Timestamptz},
IntoSql,
PgConnection,
};
@ -32,6 +32,7 @@ use diesel_async::{
};
use diesel_migrations::EmbeddedMigrations;
use futures_util::{future::BoxFuture, Future, FutureExt};
use i_love_jesus::CursorKey;
use lemmy_utils::{
error::{LemmyError, LemmyErrorExt, LemmyErrorType},
settings::SETTINGS,
@ -153,6 +154,25 @@ macro_rules! try_join_with_pool {
}};
}
pub struct ReverseTimestampKey<K>(pub K);
impl<K, C> CursorKey<C> for ReverseTimestampKey<K>
where
K: CursorKey<C, SqlType = Timestamptz>,
{
type SqlType = sql_types::BigInt;
type CursorValue = functions::reverse_timestamp_sort::HelperType<K::CursorValue>;
type SqlValue = functions::reverse_timestamp_sort::HelperType<K::SqlValue>;
fn get_cursor_value(cursor: &C) -> Self::CursorValue {
functions::reverse_timestamp_sort(K::get_cursor_value(cursor))
}
fn get_sql_value() -> Self::SqlValue {
functions::reverse_timestamp_sort(K::get_sql_value())
}
}
/// Includes an SQL comment before `T`, which can be used to label auto_explain output
#[derive(QueryId)]
pub struct Commented<T> {
@ -424,6 +444,8 @@ pub mod functions {
fn controversy_rank(upvotes: BigInt, downvotes: BigInt, score: BigInt) -> Double;
}
sql_function!(fn reverse_timestamp_sort(time: Timestamptz) -> BigInt);
sql_function!(fn lower(x: Text) -> Text);
// really this function is variadic, this just adds the two-argument version

View File

@ -23,6 +23,7 @@ full = [
"tracing",
"ts-rs",
"actix-web",
"i-love-jesus",
"lemmy_db_schema/full",
]
@ -37,6 +38,7 @@ serde_with = { workspace = true }
tracing = { workspace = true, optional = true }
ts-rs = { workspace = true, optional = true }
actix-web = { workspace = true, optional = true }
i-love-jesus = { workspace = true, optional = true }
[dev-dependencies]
serial_test = { workspace = true }

View File

@ -3,6 +3,7 @@ use diesel::{
debug_query,
dsl::{exists, not, IntervalDsl},
pg::Pg,
query_builder::AsQuery,
result::Error,
sql_types,
BoolExpressionMethods,
@ -16,8 +17,9 @@ use diesel::{
QueryDsl,
};
use diesel_async::RunQueryDsl;
use i_love_jesus::PaginatedQueryBuilder;
use lemmy_db_schema::{
aggregates::structs::PostAggregates,
aggregates::structs::{post_aggregates_keys as key, PostAggregates},
newtypes::{CommunityId, LocalUserId, PersonId, PostId},
schema::{
community,
@ -49,40 +51,13 @@ use lemmy_db_schema::{
ListFn,
Queries,
ReadFn,
ReverseTimestampKey,
},
ListingType,
SortType,
};
use tracing::debug;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Ord {
Desc,
Asc,
}
struct PaginationCursorField<Q, QS> {
then_order_by_desc: fn(Q) -> Q,
then_order_by_asc: fn(Q) -> Q,
le: fn(&PostAggregates) -> Box<dyn BoxableExpression<QS, Pg, SqlType = sql_types::Bool>>,
ge: fn(&PostAggregates) -> Box<dyn BoxableExpression<QS, Pg, SqlType = sql_types::Bool>>,
ne: fn(&PostAggregates) -> Box<dyn BoxableExpression<QS, Pg, SqlType = sql_types::Bool>>,
}
/// Returns `PaginationCursorField<_, _>` for the given name
macro_rules! field {
($name:ident) => {
// Type inference doesn't work if normal method call syntax is used
PaginationCursorField {
then_order_by_desc: |query| QueryDsl::then_order_by(query, post_aggregates::$name.desc()),
then_order_by_asc: |query| QueryDsl::then_order_by(query, post_aggregates::$name.asc()),
le: |e| Box::new(post_aggregates::$name.le(e.$name)),
ge: |e| Box::new(post_aggregates::$name.ge(e.$name)),
ne: |e| Box::new(post_aggregates::$name.ne(e.$name)),
}
};
}
fn queries<'a>() -> Queries<
impl ReadFn<'a, PostView, (PostId, Option<PersonId>, bool)>,
impl ListFn<'a, PostView, PostQuery<'a>>,
@ -462,110 +437,71 @@ fn queries<'a>() -> Queries<
query = query.filter(not(is_creator_blocked(person_id)));
}
let featured_field = if options.community_id.is_none() || options.community_id_just_for_prefetch
{
field!(featured_local)
} else {
field!(featured_community)
};
let (main_sort, top_sort_interval) = match options.sort.unwrap_or(SortType::Hot) {
SortType::Active => ((Ord::Desc, field!(hot_rank_active)), None),
SortType::Hot => ((Ord::Desc, field!(hot_rank)), None),
SortType::Scaled => ((Ord::Desc, field!(scaled_rank)), None),
SortType::Controversial => ((Ord::Desc, field!(controversy_rank)), None),
SortType::New => ((Ord::Desc, field!(published)), None),
SortType::Old => ((Ord::Asc, field!(published)), None),
SortType::NewComments => ((Ord::Desc, field!(newest_comment_time)), None),
SortType::MostComments => ((Ord::Desc, field!(comments)), None),
SortType::TopAll => ((Ord::Desc, field!(score)), None),
SortType::TopYear => ((Ord::Desc, field!(score)), Some(1.years())),
SortType::TopMonth => ((Ord::Desc, field!(score)), Some(1.months())),
SortType::TopWeek => ((Ord::Desc, field!(score)), Some(1.weeks())),
SortType::TopDay => ((Ord::Desc, field!(score)), Some(1.days())),
SortType::TopHour => ((Ord::Desc, field!(score)), Some(1.hours())),
SortType::TopSixHour => ((Ord::Desc, field!(score)), Some(6.hours())),
SortType::TopTwelveHour => ((Ord::Desc, field!(score)), Some(12.hours())),
SortType::TopThreeMonths => ((Ord::Desc, field!(score)), Some(3.months())),
SortType::TopSixMonths => ((Ord::Desc, field!(score)), Some(6.months())),
SortType::TopNineMonths => ((Ord::Desc, field!(score)), Some(9.months())),
};
if let Some(interval) = top_sort_interval {
query = query.filter(post_aggregates::published.gt(now() - interval));
}
let sorts = [
// featured posts first
Some((Ord::Desc, featured_field)),
// then use the main sort
Some(main_sort),
// hot rank reaches zero after some days, use publish as fallback. necessary because old
// posts can be fetched over federation and inserted with high post id
Some((Ord::Desc, field!(published))),
// finally use unique post id as tie breaker
Some((Ord::Desc, field!(post_id))),
];
let sorts_iter = sorts.iter().flatten();
// This loop does almost the same thing as sorting by and comparing tuples. If the rows were
// only sorted by 1 field called `foo` in descending order, then it would be like this:
//
// ```
// query = query.then_order_by(foo.desc());
// if let Some(first) = &options.page_after {
// query = query.filter(foo.le(first.foo));
// }
// if let Some(last) = &page_before_or_equal {
// query = query.filter(foo.ge(last.foo));
// }
// ```
//
// If multiple rows have the same value for a sorted field, then they are
// grouped together, and the rows in that group are sorted by the next fields.
// When checking if a row is within the range determined by the cursors, a field
// that's sorted after other fields is only compared if the row and the cursor
// are in the same group created by the previous sort, which is checked by using
// `or` to skip the comparison if any previously sorted field is not equal.
for (i, (order, field)) in sorts_iter.clone().enumerate() {
// Both cursors are treated as inclusive here. `page_after` is made exclusive
// by adding `1` to the offset.
let (then_order_by_field, compare_first, compare_last) = match order {
Ord::Desc => (field.then_order_by_desc, field.le, field.ge),
Ord::Asc => (field.then_order_by_asc, field.ge, field.le),
};
query = then_order_by_field(query);
for (cursor_data, compare) in [
(&options.page_after, compare_first),
(&options.page_before_or_equal, compare_last),
] {
let Some(cursor_data) = cursor_data else {
continue;
};
let mut condition: Box<dyn BoxableExpression<_, Pg, SqlType = sql_types::Bool>> =
Box::new(compare(&cursor_data.0));
// For each field that was sorted before the current one, skip the filter by changing
// `condition` to `true` if the row's value doesn't equal the cursor's value.
for (_, other_field) in sorts_iter.clone().take(i) {
condition = Box::new(condition.or((other_field.ne)(&cursor_data.0)));
}
query = query.filter(condition);
}
}
let (limit, mut offset) = limit_and_offset(options.page, options.limit)?;
if options.page_after.is_some() {
// always skip exactly one post because that's the last post of the previous page
// fixing the where clause is more difficult because we'd have to change only the last order-by-where clause
// e.g. WHERE (featured_local<=, hot_rank<=, published<=) to WHERE (<=, <=, <)
offset = 1;
}
let (limit, offset) = limit_and_offset(options.page, options.limit)?;
query = query.limit(limit).offset(offset);
let mut query = PaginatedQueryBuilder::new(query);
let page_after = options.page_after.map(|c| c.0);
let page_before_or_equal = options.page_before_or_equal.map(|c| c.0);
if options.page_back {
query = query
.before(page_after)
.after_or_equal(page_before_or_equal)
.limit_and_offset_from_end();
} else {
query = query
.after(page_after)
.before_or_equal(page_before_or_equal);
}
// featured posts first
query = if options.community_id.is_none() || options.community_id_just_for_prefetch {
query.then_desc(key::featured_local)
} else {
query.then_desc(key::featured_community)
};
let time = |interval| post_aggregates::published.gt(now() - interval);
// then use the main sort
query = match options.sort.unwrap_or(SortType::Hot) {
SortType::Active => query.then_desc(key::hot_rank_active),
SortType::Hot => query.then_desc(key::hot_rank),
SortType::Scaled => query.then_desc(key::scaled_rank),
SortType::Controversial => query.then_desc(key::controversy_rank),
SortType::New => query.then_desc(key::published),
SortType::Old => query.then_desc(ReverseTimestampKey(key::published)),
SortType::NewComments => query.then_desc(key::newest_comment_time),
SortType::MostComments => query.then_desc(key::comments),
SortType::TopAll => query.then_desc(key::score),
SortType::TopYear => query.then_desc(key::score).filter(time(1.years())),
SortType::TopMonth => query.then_desc(key::score).filter(time(1.months())),
SortType::TopWeek => query.then_desc(key::score).filter(time(1.weeks())),
SortType::TopDay => query.then_desc(key::score).filter(time(1.days())),
SortType::TopHour => query.then_desc(key::score).filter(time(1.hours())),
SortType::TopSixHour => query.then_desc(key::score).filter(time(6.hours())),
SortType::TopTwelveHour => query.then_desc(key::score).filter(time(12.hours())),
SortType::TopThreeMonths => query.then_desc(key::score).filter(time(3.months())),
SortType::TopSixMonths => query.then_desc(key::score).filter(time(6.months())),
SortType::TopNineMonths => query.then_desc(key::score).filter(time(9.months())),
};
// use publish as fallback. especially useful for hot rank which reaches zero after some days.
// necessary because old posts can be fetched over federation and inserted with high post id
query = match options.sort.unwrap_or(SortType::Hot) {
// A second time-based sort would not be very useful
SortType::New | SortType::Old | SortType::NewComments => query,
_ => query.then_desc(key::published),
};
// finally use unique post id as tie breaker
query = query.then_desc(key::post_id);
// Not done by debug_query
let query = query.as_query();
debug!("Post View Query: {:?}", debug_query::<Pg, _>(&query));
Commented::new(query)
@ -642,6 +578,7 @@ pub struct PostQuery<'a> {
pub limit: Option<i64>,
pub page_after: Option<PaginationCursorData>,
pub page_before_or_equal: Option<PaginationCursorData>,
pub page_back: bool,
}
impl<'a> PostQuery<'a> {
@ -709,9 +646,15 @@ impl<'a> PostQuery<'a> {
if (v.len() as i64) < limit {
Ok(Some(self.clone()))
} else {
let page_before_or_equal = Some(PaginationCursorData(v.pop().expect("else case").counts));
let item = if self.page_back {
// for backward pagination, get first element instead
v.into_iter().next()
} else {
v.pop()
};
let limit_cursor = Some(PaginationCursorData(item.expect("else case").counts));
Ok(Some(PostQuery {
page_before_or_equal,
page_before_or_equal: limit_cursor,
..self.clone()
}))
}
@ -1388,15 +1331,19 @@ mod tests {
}
}
let options = PostQuery {
community_id: Some(inserted_community.id),
sort: Some(SortType::MostComments),
limit: Some(10),
..Default::default()
};
let mut listed_post_ids = vec![];
let mut page_after = None;
loop {
let post_listings = PostQuery {
community_id: Some(inserted_community.id),
sort: Some(SortType::MostComments),
limit: Some(10),
page_after,
..Default::default()
..options.clone()
}
.list(pool)
.await?;
@ -1410,6 +1357,34 @@ mod tests {
}
}
// Check that backward pagination matches forward pagination
let mut listed_post_ids_forward = listed_post_ids.clone();
let mut page_before = None;
loop {
let post_listings = PostQuery {
page_after: page_before,
page_back: true,
..options.clone()
}
.list(pool)
.await?;
let listed_post_ids = post_listings.iter().map(|p| p.post.id).collect::<Vec<_>>();
let index = listed_post_ids_forward.len() - listed_post_ids.len();
assert_eq!(
listed_post_ids_forward.get(index..),
listed_post_ids.get(..)
);
listed_post_ids_forward.truncate(index);
if let Some(p) = post_listings.into_iter().next() {
page_before = Some(PaginationCursorData(p.counts));
} else {
break;
}
}
inserted_post_ids.sort_unstable_by_key(|id| id.0);
listed_post_ids.sort_unstable_by_key(|id| id.0);

View File

@ -0,0 +1,4 @@
DROP INDEX idx_post_aggregates_community_published_asc, idx_post_aggregates_featured_community_published_asc, idx_post_aggregates_featured_local_published_asc, idx_post_aggregates_published_asc;
DROP FUNCTION reverse_timestamp_sort (t timestamp with time zone);

View File

@ -0,0 +1,18 @@
CREATE FUNCTION reverse_timestamp_sort (t timestamp with time zone)
RETURNS bigint
AS $$
BEGIN
RETURN (-1000000 * EXTRACT(EPOCH FROM t))::bigint;
END;
$$
LANGUAGE plpgsql
IMMUTABLE PARALLEL SAFE;
CREATE INDEX idx_post_aggregates_community_published_asc ON public.post_aggregates USING btree (community_id, featured_local DESC, reverse_timestamp_sort (published) DESC);
CREATE INDEX idx_post_aggregates_featured_community_published_asc ON public.post_aggregates USING btree (community_id, featured_community DESC, reverse_timestamp_sort (published) DESC);
CREATE INDEX idx_post_aggregates_featured_local_published_asc ON public.post_aggregates USING btree (featured_local DESC, reverse_timestamp_sort (published) DESC);
CREATE INDEX idx_post_aggregates_published_asc ON public.post_aggregates USING btree (reverse_timestamp_sort (published) DESC);