From b3ff055fd3ba7684b348256a4c536ffa4abe3158 Mon Sep 17 00:00:00 2001 From: Kiana Sheibani Date: Fri, 1 Dec 2023 18:32:55 -0500 Subject: [PATCH] Use time filters instead of page switching This change allows us to avoid the bug of only being able to access up to 10000 tournaments before start.gg's API throws an error. --- Cargo.lock | 10 ++++ Cargo.toml | 3 ++ src/queries/scalars.rs | 5 ++ src/queries/tournament_events.rs | 79 ++++++++++++--------------- src/sync.rs | 91 ++++++++++++++++++-------------- 5 files changed, 102 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fdff173..3377d53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -626,6 +626,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -1149,6 +1158,7 @@ dependencies = [ "cynic", "cynic-codegen", "dirs", + "itertools", "reqwest", "schema", "serde", diff --git a/Cargo.toml b/Cargo.toml index 312500f..87dbb6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,5 +26,8 @@ serde = "1.0" dirs = "5.0" sqlite = "0.31" +# Other +itertools = "0.12.0" + [build-dependencies] cynic-codegen = "3.2" diff --git a/src/queries/scalars.rs b/src/queries/scalars.rs index c7b1486..04af027 100644 --- a/src/queries/scalars.rs +++ b/src/queries/scalars.rs @@ -39,6 +39,11 @@ impl Display for StringOrInt { #[repr(transparent)] pub struct VideogameId(pub u64); +#[derive(cynic::Scalar, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cynic(graphql_type = "ID")] +#[repr(transparent)] +pub struct TournamentId(pub u64); + #[derive(cynic::Scalar, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cynic(graphql_type = "ID")] #[repr(transparent)] diff --git a/src/queries/tournament_events.rs b/src/queries/tournament_events.rs index cac0a80..05f25f4 100644 --- a/src/queries/tournament_events.rs +++ b/src/queries/tournament_events.rs @@ -7,15 +7,12 @@ use schema::schema; #[derive(cynic::QueryVariables, Debug, Copy, Clone)] pub struct TournamentEventsVars<'a> { - // HACK: This should really be an optional variable, but there seems to be a - // server-side bug that completely breaks everything when this isn't passed. - // We can use a dummy value of 1 when we don't want to filter by time. - pub last_sync: Timestamp, + pub after_date: Timestamp, + pub before_date: Timestamp, pub game_id: VideogameId, pub country: Option<&'a str>, pub state: Option<&'a str>, - pub page: i32, } // Query @@ -24,12 +21,13 @@ pub struct TournamentEventsVars<'a> { #[cynic(graphql_type = "Query", variables = "TournamentEventsVars")] pub struct TournamentEvents { #[arguments(query: { - page: $page, + page: 1, perPage: 225, - sortBy: "endAt asc", + sortBy: "startAt asc", filter: { past: true, - afterDate: $last_sync, + afterDate: $after_date, + beforeDate: $before_date, videogameIds: [$game_id], countryCode: $country, addrState: $state @@ -40,19 +38,15 @@ pub struct TournamentEvents { #[derive(cynic::QueryFragment, Debug)] #[cynic(variables = "TournamentEventsVars")] struct TournamentConnection { - page_info: Option, #[cynic(flatten)] nodes: Vec, } -#[derive(cynic::QueryFragment, Debug)] -struct PageInfo { - total_pages: Option, -} - #[derive(cynic::QueryFragment, Debug)] #[cynic(variables = "TournamentEventsVars")] struct Tournament { + id: Option, + start_at: Option, #[arguments(limit: 99999, filter: { videogameId: [$game_id] })] #[cynic(flatten)] events: Vec, @@ -68,14 +62,10 @@ struct Event { // Unwrap -#[derive(Debug, Clone)] -pub struct TournamentEventResponse { - pub pages: i32, - pub tournaments: Vec, -} - #[derive(Debug, Clone)] pub struct TournamentData { + pub id: TournamentId, + pub time: Timestamp, pub events: Vec, } @@ -87,36 +77,33 @@ pub struct EventData { } impl<'a> QueryUnwrap> for TournamentEvents { - type Unwrapped = TournamentEventResponse; + type Unwrapped = Vec; - fn unwrap_response( - response: GraphQlResponse, - ) -> Option { + fn unwrap_response(response: GraphQlResponse) -> Option> { let response_tournaments = response.data?.tournaments?; - let tournaments = response_tournaments - .nodes - .into_iter() - .filter_map(|tour| { - Some(TournamentData { - events: tour - .events - .into_iter() - .filter_map(|event| { - Some(EventData { - id: event.id?, - slug: event.slug?, - time: event.start_at?, + Some( + response_tournaments + .nodes + .into_iter() + .filter_map(|tour| { + Some(TournamentData { + id: tour.id?, + time: tour.start_at?, + events: tour + .events + .into_iter() + .filter_map(|event| { + Some(EventData { + id: event.id?, + slug: event.slug?, + time: event.start_at?, + }) }) - }) - .collect(), + .collect(), + }) }) - }) - .collect::>(); - - Some(TournamentEventResponse { - pages: response_tournaments.page_info?.total_pages?, - tournaments, - }) + .collect::>(), + ) } } diff --git a/src/sync.rs b/src/sync.rs index 283dcca..7d9e6e9 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -5,6 +5,7 @@ use std::time::Duration; use crate::database::*; use crate::error; use crate::queries::*; +use itertools::Itertools; use sqlite::*; // Glicko-2 system calculation @@ -118,62 +119,72 @@ fn get_event_sets(event: EventId, auth: &str) -> Option> { } } -fn get_tournament_events(metadata: &DatasetMetadata, auth: &str) -> Option> { +fn get_tournament_events( + metadata: &DatasetMetadata, + current_time: Timestamp, + auth: &str, +) -> Option> { println!("Accessing tournaments..."); + let mut after = metadata.last_sync; + let tour_response = run_query::( TournamentEventsVars { - last_sync: metadata.last_sync, + after_date: after, + before_date: current_time, game_id: metadata.game_id, country: metadata.country.as_deref(), state: metadata.state.as_deref(), - page: 1, }, auth, )?; - let pages = tour_response.pages; - if pages == 0 { - Some(vec![]) - } else if pages == 1 { - Some( - tour_response - .tournaments - .into_iter() - .flat_map(|tour| tour.events) - .collect::>(), - ) + let mut cont = !tour_response.is_empty(); + after = if tour_response.iter().any(|tour| tour.time != after) { + tour_response.last().unwrap().time } else { - let mut tournaments = tour_response - .tournaments - .into_iter() - .flat_map(|tour| tour.events) - .collect::>(); + Timestamp(after.0 + 1) + }; - for page in 2..=pages { - println!(" (Page {})", page); + let mut tournaments = tour_response; - let next_response = run_query::( - TournamentEventsVars { - last_sync: metadata.last_sync, - game_id: metadata.game_id, - country: metadata.country.as_deref(), - state: metadata.state.as_deref(), - page, - }, - auth, - )?; + let mut page: u64 = 1; + while cont { + page += 1; + println!(" (Page {})", page); - tournaments.extend( - next_response - .tournaments - .into_iter() - .flat_map(|tour| tour.events), - ); - } + let next_response = run_query::( + TournamentEventsVars { + after_date: after, + before_date: current_time, + game_id: metadata.game_id, + country: metadata.country.as_deref(), + state: metadata.state.as_deref(), + }, + auth, + )?; - Some(tournaments) + cont = !next_response.is_empty(); + after = if next_response.iter().any(|tour| tour.time != after) { + next_response.last().unwrap().time + } else { + Timestamp(after.0 + 1) + }; + + tournaments.extend(next_response); } + + println!("Deduplicating..."); + + Some( + tournaments + .into_iter() + .group_by(|tour| tour.time) + .into_iter() + .flat_map(|(_, group)| group.into_iter().unique_by(|tour| tour.id)) + .flat_map(|tour| tour.events) + .collect::>(), + ) } // Dataset syncing @@ -292,7 +303,7 @@ pub fn sync_dataset( current_time: Timestamp, auth: &str, ) -> sqlite::Result<()> { - let events = get_tournament_events(&metadata, auth) + let events = get_tournament_events(&metadata, current_time, auth) .unwrap_or_else(|| error("Could not access start.gg", 1)); connection.execute("BEGIN;")?;