diff --git a/Cargo.lock b/Cargo.lock index fdff173..3377d53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -626,6 +626,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -1149,6 +1158,7 @@ dependencies = [ "cynic", "cynic-codegen", "dirs", + "itertools", "reqwest", "schema", "serde", diff --git a/Cargo.toml b/Cargo.toml index 312500f..87dbb6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,5 +26,8 @@ serde = "1.0" dirs = "5.0" sqlite = "0.31" +# Other +itertools = "0.12.0" + [build-dependencies] cynic-codegen = "3.2" diff --git a/src/queries/scalars.rs b/src/queries/scalars.rs index c7b1486..04af027 100644 --- a/src/queries/scalars.rs +++ b/src/queries/scalars.rs @@ -39,6 +39,11 @@ impl Display for StringOrInt { #[repr(transparent)] pub struct VideogameId(pub u64); +#[derive(cynic::Scalar, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cynic(graphql_type = "ID")] +#[repr(transparent)] +pub struct TournamentId(pub u64); + #[derive(cynic::Scalar, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cynic(graphql_type = "ID")] #[repr(transparent)] diff --git a/src/queries/tournament_events.rs b/src/queries/tournament_events.rs index cac0a80..05f25f4 100644 --- a/src/queries/tournament_events.rs +++ b/src/queries/tournament_events.rs @@ -7,15 +7,12 @@ use schema::schema; #[derive(cynic::QueryVariables, Debug, Copy, Clone)] pub struct TournamentEventsVars<'a> { - // HACK: This should really be an optional variable, but there seems to be a - // server-side bug that completely breaks everything when this isn't passed. - // We can use a dummy value of 1 when we don't want to filter by time. - pub last_sync: Timestamp, + pub after_date: Timestamp, + pub before_date: Timestamp, pub game_id: VideogameId, pub country: Option<&'a str>, pub state: Option<&'a str>, - pub page: i32, } // Query @@ -24,12 +21,13 @@ pub struct TournamentEventsVars<'a> { #[cynic(graphql_type = "Query", variables = "TournamentEventsVars")] pub struct TournamentEvents { #[arguments(query: { - page: $page, + page: 1, perPage: 225, - sortBy: "endAt asc", + sortBy: "startAt asc", filter: { past: true, - afterDate: $last_sync, + afterDate: $after_date, + beforeDate: $before_date, videogameIds: [$game_id], countryCode: $country, addrState: $state @@ -40,19 +38,15 @@ pub struct TournamentEvents { #[derive(cynic::QueryFragment, Debug)] #[cynic(variables = "TournamentEventsVars")] struct TournamentConnection { - page_info: Option, #[cynic(flatten)] nodes: Vec, } -#[derive(cynic::QueryFragment, Debug)] -struct PageInfo { - total_pages: Option, -} - #[derive(cynic::QueryFragment, Debug)] #[cynic(variables = "TournamentEventsVars")] struct Tournament { + id: Option, + start_at: Option, #[arguments(limit: 99999, filter: { videogameId: [$game_id] })] #[cynic(flatten)] events: Vec, @@ -68,14 +62,10 @@ struct Event { // Unwrap -#[derive(Debug, Clone)] -pub struct TournamentEventResponse { - pub pages: i32, - pub tournaments: Vec, -} - #[derive(Debug, Clone)] pub struct TournamentData { + pub id: TournamentId, + pub time: Timestamp, pub events: Vec, } @@ -87,36 +77,33 @@ pub struct EventData { } impl<'a> QueryUnwrap> for TournamentEvents { - type Unwrapped = TournamentEventResponse; + type Unwrapped = Vec; - fn unwrap_response( - response: GraphQlResponse, - ) -> Option { + fn unwrap_response(response: GraphQlResponse) -> Option> { let response_tournaments = response.data?.tournaments?; - let tournaments = response_tournaments - .nodes - .into_iter() - .filter_map(|tour| { - Some(TournamentData { - events: tour - .events - .into_iter() - .filter_map(|event| { - Some(EventData { - id: event.id?, - slug: event.slug?, - time: event.start_at?, + Some( + response_tournaments + .nodes + .into_iter() + .filter_map(|tour| { + Some(TournamentData { + id: tour.id?, + time: tour.start_at?, + events: tour + .events + .into_iter() + .filter_map(|event| { + Some(EventData { + id: event.id?, + slug: event.slug?, + time: event.start_at?, + }) }) - }) - .collect(), + .collect(), + }) }) - }) - .collect::>(); - - Some(TournamentEventResponse { - pages: response_tournaments.page_info?.total_pages?, - tournaments, - }) + .collect::>(), + ) } } diff --git a/src/sync.rs b/src/sync.rs index 283dcca..7d9e6e9 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -5,6 +5,7 @@ use std::time::Duration; use crate::database::*; use crate::error; use crate::queries::*; +use itertools::Itertools; use sqlite::*; // Glicko-2 system calculation @@ -118,62 +119,72 @@ fn get_event_sets(event: EventId, auth: &str) -> Option> { } } -fn get_tournament_events(metadata: &DatasetMetadata, auth: &str) -> Option> { +fn get_tournament_events( + metadata: &DatasetMetadata, + current_time: Timestamp, + auth: &str, +) -> Option> { println!("Accessing tournaments..."); + let mut after = metadata.last_sync; + let tour_response = run_query::( TournamentEventsVars { - last_sync: metadata.last_sync, + after_date: after, + before_date: current_time, game_id: metadata.game_id, country: metadata.country.as_deref(), state: metadata.state.as_deref(), - page: 1, }, auth, )?; - let pages = tour_response.pages; - if pages == 0 { - Some(vec![]) - } else if pages == 1 { - Some( - tour_response - .tournaments - .into_iter() - .flat_map(|tour| tour.events) - .collect::>(), - ) + let mut cont = !tour_response.is_empty(); + after = if tour_response.iter().any(|tour| tour.time != after) { + tour_response.last().unwrap().time } else { - let mut tournaments = tour_response - .tournaments - .into_iter() - .flat_map(|tour| tour.events) - .collect::>(); + Timestamp(after.0 + 1) + }; - for page in 2..=pages { - println!(" (Page {})", page); + let mut tournaments = tour_response; - let next_response = run_query::( - TournamentEventsVars { - last_sync: metadata.last_sync, - game_id: metadata.game_id, - country: metadata.country.as_deref(), - state: metadata.state.as_deref(), - page, - }, - auth, - )?; + let mut page: u64 = 1; + while cont { + page += 1; + println!(" (Page {})", page); - tournaments.extend( - next_response - .tournaments - .into_iter() - .flat_map(|tour| tour.events), - ); - } + let next_response = run_query::( + TournamentEventsVars { + after_date: after, + before_date: current_time, + game_id: metadata.game_id, + country: metadata.country.as_deref(), + state: metadata.state.as_deref(), + }, + auth, + )?; - Some(tournaments) + cont = !next_response.is_empty(); + after = if next_response.iter().any(|tour| tour.time != after) { + next_response.last().unwrap().time + } else { + Timestamp(after.0 + 1) + }; + + tournaments.extend(next_response); } + + println!("Deduplicating..."); + + Some( + tournaments + .into_iter() + .group_by(|tour| tour.time) + .into_iter() + .flat_map(|(_, group)| group.into_iter().unique_by(|tour| tour.id)) + .flat_map(|tour| tour.events) + .collect::>(), + ) } // Dataset syncing @@ -292,7 +303,7 @@ pub fn sync_dataset( current_time: Timestamp, auth: &str, ) -> sqlite::Result<()> { - let events = get_tournament_events(&metadata, auth) + let events = get_tournament_events(&metadata, current_time, auth) .unwrap_or_else(|| error("Could not access start.gg", 1)); connection.execute("BEGIN;")?;