From 4b44b8253111fe82513b08713263a2b6d91de7fa Mon Sep 17 00:00:00 2001 From: Kiana Sheibani Date: Sat, 17 Aug 2024 21:54:57 -0400 Subject: [PATCH] feat!: overhaul the entire rating algorithm I am far, FAR too lazy to split this into multiple commits, so here it is. --- src/database.rs | 456 +++++++++++++++++------------------------------- src/main.rs | 301 +++++++++++++++----------------- src/sync.rs | 218 +++-------------------- src/util.rs | 1 + 4 files changed, 328 insertions(+), 648 deletions(-) diff --git a/src/database.rs b/src/database.rs index d6eec7e..e6afaf1 100644 --- a/src/database.rs +++ b/src/database.rs @@ -1,6 +1,5 @@ use crate::queries::*; use sqlite::*; -use std::fs::{self, OpenOptions}; use std::path::{Path, PathBuf}; pub struct DatasetMetadata { @@ -14,15 +13,14 @@ pub struct DatasetMetadata { pub country: Option, pub state: Option, - pub set_limit: u64, - pub decay_rate: f64, - pub adj_decay_rate: f64, - pub period: f64, - pub tau: f64, + pub decay_const: f64, + pub var_const: f64, } /// Return the path to the datasets file. fn datasets_path(dir: &Path) -> std::io::Result { + use std::fs::{self, OpenOptions}; + let mut path = dir.to_owned(); // Create datasets path if it doesn't exist @@ -50,11 +48,8 @@ CREATE TABLE IF NOT EXISTS datasets ( game_slug TEXT NOT NULL, country TEXT, state TEXT, - set_limit INTEGER NOT NULL, decay_rate REAL NOT NULL, - adj_decay_rate REAL NOT NULL, - period REAL NOT NULL, - tau REAL NOT NULL + var_const REAL NOT NULL ) STRICT; CREATE TABLE IF NOT EXISTS players ( @@ -113,11 +108,8 @@ pub fn list_datasets(connection: &Connection) -> sqlite::Result("game_slug").to_owned(), country: r_.read::, _>("country").map(String::from), state: r_.read::, _>("state").map(String::from), - set_limit: r_.read::("set_limit") as u64, - decay_rate: r_.read::("decay_rate"), - adj_decay_rate: r_.read::("adj_decay_rate"), - period: r_.read::("period"), - tau: r_.read::("tau"), + decay_const: r_.read::("decay_rate"), + var_const: r_.read::("adj_decay_rate"), }, )) }) @@ -157,17 +149,14 @@ pub fn new_dataset( dataset: &str, metadata: DatasetMetadata, ) -> sqlite::Result<()> { - let query1 = r#"INSERT INTO datasets VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"#; + let query1 = r#"INSERT INTO datasets VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"#; let query2 = format!( r#"CREATE TABLE "{0}_players" ( id INTEGER PRIMARY KEY REFERENCES players, - last_played INTEGER NOT NULL, - deviation REAL NOT NULL, - volatility REAL NOT NULL, - sets_won TEXT NOT NULL, + sets_won TEXT NOT NULL DEFAULT '', sets_count_won INTEGER AS (length(sets_won) - length(replace(sets_won, ';', ''))), - sets_lost TEXT NOT NULL, + sets_lost TEXT NOT NULL DEFAULT '', sets_count_lost INTEGER AS (length(sets_lost) - length(replace(sets_lost, ';', ''))), sets TEXT AS (sets_won || sets_lost), sets_count INTEGER AS (sets_count_won + sets_count_lost) @@ -177,10 +166,12 @@ CREATE TABLE "{0}_network" ( player_A INTEGER NOT NULL, player_B INTEGER NOT NULL, advantage REAL NOT NULL, + variance REAL NOT NULL, + last_updated INTEGER NOT NULL, - sets_A TEXT NOT NULL, + sets_A TEXT NOT NULL DEFAULT '', sets_count_A INTEGER AS (length(sets_A) - length(replace(sets_A, ';', ''))), - sets_B TEXT NOT NULL, + sets_B TEXT NOT NULL DEFAULT '', sets_count_B INTEGER AS (length(sets_B) - length(replace(sets_B, ';', ''))), sets TEXT AS (sets_A || sets_B), sets_count INTEGER AS (sets_count_A + sets_count_B), @@ -208,11 +199,8 @@ CREATE INDEX "{0}_network_B" ON "{0}_network" (player_B);"#, .bind((7, &metadata.game_slug[..]))? .bind((8, metadata.country.as_deref()))? .bind((9, metadata.state.as_deref()))? - .bind((10, metadata.set_limit as i64))? - .bind((11, metadata.decay_rate))? - .bind((12, metadata.adj_decay_rate))? - .bind((13, metadata.period))? - .bind((14, metadata.tau))? + .bind((10, metadata.decay_const))? + .bind((11, metadata.var_const))? .try_for_each(|x| x.map(|_| ()))?; connection.execute(query2) @@ -242,11 +230,8 @@ pub fn get_metadata( game_slug: r_.read::<&str, _>("game_slug").to_owned(), country: r_.read::, _>("country").map(String::from), state: r_.read::, _>("state").map(String::from), - set_limit: r_.read::("set_limit") as u64, - decay_rate: r_.read::("decay_rate"), - adj_decay_rate: r_.read::("adj_decay_rate"), - period: r_.read::("period"), - tau: r_.read::("tau"), + decay_const: r_.read::("decay_rate"), + var_const: r_.read::("var_const"), }) }) .and_then(Result::ok)) @@ -290,39 +275,46 @@ pub fn add_set(connection: &Connection, set: &SetId, event: EventId) -> sqlite:: pub fn add_players( connection: &Connection, dataset: &str, - teams: &Teams, - time: Timestamp, + players: &Vec, ) -> sqlite::Result<()> { let query1 = "INSERT OR IGNORE INTO players (id, discrim, name, prefix) VALUES (?, ?, ?, ?)"; let query2 = format!( - r#"INSERT OR IGNORE INTO "{}_players" - (id, last_played, deviation, volatility, sets_won, sets_lost) - VALUES (?, ?, 2.01, 0.06, '', '')"#, + r#"INSERT OR IGNORE INTO "{}_players" (id) VALUES (?)"#, dataset ); - teams.iter().try_for_each(|team| { - team.iter().try_for_each( - |PlayerData { - id, - name, - prefix, - discrim, - }| { - let mut statement = connection.prepare(&query1)?; - statement.bind((1, id.0 as i64))?; - statement.bind((2, &discrim[..]))?; - statement.bind((3, &name[..]))?; - statement.bind((4, prefix.as_ref().map(|x| &x[..])))?; - statement.into_iter().try_for_each(|x| x.map(|_| ()))?; + players.iter().try_for_each( + |PlayerData { + id, + name, + prefix, + discrim, + }| { + let mut statement = connection.prepare(&query1)?; + statement.bind((1, id.0 as i64))?; + statement.bind((2, &discrim[..]))?; + statement.bind((3, &name[..]))?; + statement.bind((4, prefix.as_ref().map(|x| &x[..])))?; + statement.into_iter().try_for_each(|x| x.map(|_| ()))?; - statement = connection.prepare(&query2)?; - statement.bind((1, id.0 as i64))?; - statement.bind((2, time.0 as i64))?; - statement.into_iter().try_for_each(|x| x.map(|_| ())) - }, - ) - }) + statement = connection.prepare(&query2)?; + statement.bind((1, id.0 as i64))?; + statement.into_iter().try_for_each(|x| x.map(|_| ())) + }, + ) +} + +pub fn get_all_players(connection: &Connection, dataset: &str) -> sqlite::Result> { + let query = format!(r#"SELECT id FROM "{}_players""#, dataset,); + + connection + .prepare(&query)? + .into_iter() + .map(|r| { + let r_ = r?; + Ok(PlayerId(r_.read::("id") as u64)) + }) + .try_collect() } pub fn get_player(connection: &Connection, player: PlayerId) -> sqlite::Result { @@ -375,26 +367,6 @@ pub fn match_player_name(connection: &Connection, name: &str) -> sqlite::Result< .try_collect() } -pub fn get_player_rating_data( - connection: &Connection, - dataset: &str, - player: PlayerId, -) -> sqlite::Result<(f64, f64, Timestamp)> { - let query = format!( - r#"SELECT deviation, volatility, last_played FROM "{}_players" WHERE id = ?"#, - dataset - ); - - let mut statement = connection.prepare(&query)?; - statement.bind((1, player.0 as i64))?; - statement.next()?; - Ok(( - statement.read::("deviation")?, - statement.read::("volatility")?, - Timestamp(statement.read::("last_played")? as u64), - )) -} - pub fn get_player_set_counts( connection: &Connection, dataset: &str, @@ -436,27 +408,21 @@ pub fn get_matchup_set_counts( )) } -pub fn set_player_data( +pub fn set_player_set_counts( connection: &Connection, dataset: &str, player: PlayerId, - last_played: Timestamp, - deviation: f64, - volatility: f64, won: bool, set: &SetId, ) -> sqlite::Result<()> { let query = format!( - r#"UPDATE "{}_players" SET deviation = :dev, volatility = :vol, last_played = :last, - sets_won = iif(:won, sets_won || :set || ';', sets_won), - sets_lost = iif(:won, sets_lost, sets_lost || :set || ';') WHERE id = :id"#, + r#"UPDATE "{}_players" SET +sets_won = iif(:won, sets_won || :set || ';', sets_won), +sets_lost = iif(:won, sets_lost, sets_lost || :set || ';') WHERE id = :id"#, dataset ); let mut statement = connection.prepare(&query)?; - statement.bind((":dev", deviation))?; - statement.bind((":vol", volatility))?; - statement.bind((":last", last_played.0 as i64))?; statement.bind((":id", player.0 as i64))?; statement.bind((":won", if won { 1 } else { 0 }))?; statement.bind((":set", &set.0.to_string()[..]))?; @@ -464,18 +430,18 @@ pub fn set_player_data( Ok(()) } -pub fn get_advantage( +pub fn get_network_data( connection: &Connection, dataset: &str, player1: PlayerId, player2: PlayerId, -) -> sqlite::Result> { +) -> sqlite::Result> { if player1 == player2 { - return Ok(Some(0.0)); + return Ok(Some((0.0, 0.0))); } let query = format!( - r#"SELECT iif(:a > :b, -advantage, advantage) AS advantage FROM "{}_network" + r#"SELECT iif(:a > :b, -advantage, advantage) AS advantage, variance FROM "{}_network" WHERE player_A = min(:a, :b) AND player_B = max(:a, :b)"#, dataset ); @@ -484,20 +450,24 @@ pub fn get_advantage( statement.bind((":a", player1.0 as i64))?; statement.bind((":b", player2.0 as i64))?; statement.next()?; - statement.read::, _>("advantage") + Ok(statement + .read::, _>("advantage")? + .zip(statement.read::, _>("variance")?)) } -pub fn insert_advantage( +pub fn insert_network_data( connection: &Connection, dataset: &str, player1: PlayerId, player2: PlayerId, advantage: f64, + variance: f64, + time: Timestamp, ) -> sqlite::Result<()> { let query = format!( r#"INSERT INTO "{}_network" - (player_A, player_B, advantage, sets_A, sets_B) - VALUES (min(:a, :b), max(:a, :b), iif(:a > :b, -:v, :v), '', '')"#, + (player_A, player_B, advantage, variance, last_updated) + VALUES (min(:a, :b), max(:a, :b), iif(:a > :b, -:v, :v), :d, :t)"#, dataset ); @@ -505,32 +475,67 @@ pub fn insert_advantage( statement.bind((":a", player1.0 as i64))?; statement.bind((":b", player2.0 as i64))?; statement.bind((":v", advantage))?; + statement.bind((":d", variance))?; + statement.bind((":t", time.0 as i64))?; statement.into_iter().try_for_each(|x| x.map(|_| ())) } -pub fn adjust_advantages( +pub fn adjust_for_time( connection: &Connection, dataset: &str, - set: SetId, + player: PlayerId, + var_const: f64, + time: Timestamp, +) -> sqlite::Result<()> { + let query = format!( + r#"UPDATE "{0}_network" SET +variance = min(variance + :c * (:t - last_updated), 5.0), +last_updated = :t +WHERE player_A = :i OR player_B = :i"#, + dataset + ); + + let mut statement = connection.prepare(query)?; + statement.bind((":i", player.0 as i64))?; + statement.bind((":c", var_const))?; + statement.bind((":t", time.0 as i64))?; + statement.into_iter().try_for_each(|x| x.map(|_| ())) +} + +pub fn glicko_adjust( + connection: &Connection, + dataset: &str, + set: &SetId, player1: PlayerId, player2: PlayerId, + advantage: f64, + variance: f64, winner: usize, - adjust1: f64, - adjust2: f64, decay_rate: f64, ) -> sqlite::Result<()> { + let score = if winner != 0 { 1.0 } else { 0.0 }; + + let exp_val = 1.0 / (1.0 + (-advantage).exp()); + + let like_var = 1.0 / exp_val / (1.0 - exp_val); + let var_new = 1.0 / (1.0 / variance + 1.0 / like_var); + let adjust = score - exp_val; + let query1 = format!( - r#"UPDATE "{}_network" -SET advantage = advantage + iif(:pl = player_A, -:v, :v) * :d + r#"UPDATE "{}_network" SET +variance = 1.0 / (1.0 / variance + :d / :lv), +advantage = advantage + :d * iif(:pl = player_A, -:adj, :adj) + / (1.0 / variance + :d / :lv) WHERE (player_A = :pl AND player_B != :plo) OR (player_B = :pl AND player_A != :plo)"#, dataset ); let query2 = format!( - r#"UPDATE "{}_network" -SET advantage = advantage + iif(:a > :b, -:v, :v), - sets_A = iif(:w = (:a > :b), sets_A || :set || ';', sets_A), - sets_B = iif(:w = (:b > :a), sets_B || :set || ';', sets_B) + r#"UPDATE "{}_network" SET +variance = :var, +advantage = advantage + iif(:a > :b, -:adj, :adj) * :var, +sets_A = iif(:w = (:a > :b), sets_A || :set || ';', sets_A), +sets_B = iif(:w = (:b > :a), sets_B || :set || ';', sets_B) WHERE player_A = min(:a, :b) AND player_B = max(:a, :b)"#, dataset ); @@ -538,21 +543,24 @@ WHERE player_A = min(:a, :b) AND player_B = max(:a, :b)"#, let mut statement = connection.prepare(&query1)?; statement.bind((":pl", player1.0 as i64))?; statement.bind((":plo", player2.0 as i64))?; - statement.bind((":v", adjust1))?; + statement.bind((":adj", -0.5 * adjust))?; statement.bind((":d", decay_rate))?; + statement.bind((":lv", like_var))?; statement.into_iter().try_for_each(|x| x.map(|_| ()))?; statement = connection.prepare(&query1)?; statement.bind((":pl", player2.0 as i64))?; statement.bind((":plo", player1.0 as i64))?; - statement.bind((":v", adjust2))?; + statement.bind((":adj", 0.5 * adjust))?; statement.bind((":d", decay_rate))?; + statement.bind((":lv", like_var))?; statement.into_iter().try_for_each(|x| x.map(|_| ()))?; statement = connection.prepare(&query2)?; statement.bind((":a", player1.0 as i64))?; statement.bind((":b", player2.0 as i64))?; - statement.bind((":v", adjust2 - adjust1))?; + statement.bind((":adj", adjust))?; + statement.bind((":var", var_new))?; statement.bind((":w", winner as i64))?; statement.bind((":set", &set.0.to_string()[..]))?; statement.into_iter().try_for_each(|x| x.map(|_| ())) @@ -562,11 +570,11 @@ pub fn get_edges( connection: &Connection, dataset: &str, player: PlayerId, -) -> sqlite::Result> { +) -> sqlite::Result> { let query = format!( r#"SELECT iif(:pl = player_B, player_A, player_B) AS id, - iif(:pl = player_B, -advantage, advantage) AS advantage, sets_count + iif(:pl = player_B, -advantage, advantage) AS advantage, variance FROM "{}_network" WHERE player_A = :pl OR player_B = :pl"#, dataset @@ -581,7 +589,7 @@ pub fn get_edges( Ok(( PlayerId(r_.read::("id") as u64), r_.read::("advantage"), - r_.read::("sets_count") as u64, + r_.read::("variance"), )) }) .try_collect() @@ -616,20 +624,20 @@ pub fn hypothetical_advantage( dataset: &str, player1: PlayerId, player2: PlayerId, - set_limit: u64, decay_rate: f64, - adj_decay_rate: f64, -) -> sqlite::Result { +) -> sqlite::Result<(f64, f64)> { use std::collections::{HashSet, VecDeque}; // Check trivial cases - if player1 == player2 || either_isolated(connection, dataset, player1, player2)? { - return Ok(0.0); + if player1 == player2 { + return Ok((0.0, 0.0)); + } else if decay_rate < 0.05 || either_isolated(connection, dataset, player1, player2)? { + return Ok((0.0, 5.0)); } let mut visited: HashSet = HashSet::new(); - let mut queue: VecDeque<(PlayerId, Vec<(f64, f64)>)> = - VecDeque::from([(player1, Vec::from([(0.0, 1.0)]))]); + let mut queue: VecDeque<(PlayerId, Vec<(f64, f64, f64)>)> = + VecDeque::from([(player1, Vec::from([(0.0, 0.0, 1.0 / decay_rate)]))]); let mut final_paths = Vec::new(); @@ -638,7 +646,7 @@ pub fn hypothetical_advantage( let connections = get_edges(connection, dataset, visiting)?; - for (id, adv, sets) in connections + for (id, adv, var) in connections .into_iter() .filter(|(id, _, _)| !visited.contains(id)) { @@ -652,12 +660,9 @@ pub fn hypothetical_advantage( }; if rf.len() < 100 { - let decay = if sets >= set_limit { - decay_rate - } else { - adj_decay_rate - }; - let iter = paths.iter().map(|(a, d)| (a + adv, d * decay)); + let iter = paths + .iter() + .map(|(av, vr, dec)| (av + adv, vr + var, dec * decay_rate)); rf.extend(iter); rf.truncate(100); @@ -667,22 +672,23 @@ pub fn hypothetical_advantage( visited.insert(visiting); } - let max_decay = final_paths - .iter() - .map(|x| x.1) - .max_by(|d1, d2| d1.partial_cmp(d2).unwrap()); - - if let Some(mdec) = max_decay { - let sum_decay = final_paths.iter().map(|x| x.1).sum::(); - Ok(final_paths - .into_iter() - .map(|(adv, dec)| adv * dec) - .sum::() - / sum_decay - * mdec) - } else { + if final_paths.len() == 0 { // No paths found - Ok(0.0) + Ok((0.0, 5.0)) + } else { + let sum_decay: f64 = final_paths.iter().map(|(_, _, dec)| dec).sum(); + let (final_adv, final_var) = final_paths + .into_iter() + .fold((0.0, 0.0), |(av, vr), (adv, var, dec)| { + (av + adv * dec, vr + (var + adv * adv) * dec) + }); + let mut final_adv = final_adv / sum_decay; + let mut final_var = final_var / sum_decay - final_adv * final_adv; + if final_var > 5.0 { + final_adv = final_adv * (5.0 / final_var).sqrt(); + final_var = 5.0; + } + Ok((final_adv, final_var)) } } @@ -691,21 +697,12 @@ pub fn initialize_edge( dataset: &str, player1: PlayerId, player2: PlayerId, - set_limit: u64, decay_rate: f64, - adj_decay_rate: f64, -) -> sqlite::Result { - let adv = hypothetical_advantage( - connection, - dataset, - player1, - player2, - set_limit, - decay_rate, - adj_decay_rate, - )?; - insert_advantage(connection, dataset, player1, player2, adv)?; - Ok(adv) + time: Timestamp, +) -> sqlite::Result<(f64, f64)> { + let (adv, var) = hypothetical_advantage(connection, dataset, player1, player2, decay_rate)?; + insert_network_data(connection, dataset, player1, player2, adv, var, time)?; + Ok((adv, var)) } // Tests @@ -729,8 +726,7 @@ CREATE TABLE IF NOT EXISTS datasets ( set_limit INTEGER NOT NULL, decay_rate REAL NOT NULL, adj_decay_rate REAL NOT NULL, - period REAL NOT NULL, - tau REAL NOT NULL + var_const ) STRICT; CREATE TABLE IF NOT EXISTS players ( @@ -769,11 +765,8 @@ CREATE TABLE IF NOT EXISTS sets ( game_slug: String::from("test"), country: None, state: None, - set_limit: 0, - decay_rate: 0.5, - adj_decay_rate: 0.5, - period: (3600 * 24 * 30) as f64, - tau: 0.2, + decay_const: 0.5, + var_const: 0.00000001, } } @@ -787,141 +780,4 @@ CREATE TABLE IF NOT EXISTS sets ( }) .collect() } - - #[test] - fn sqlite_sanity_check() -> sqlite::Result<()> { - let test_value: i64 = 2; - - let connection = sqlite::open(":memory:")?; - connection.execute( - r#"CREATE TABLE test (a INTEGER); - INSERT INTO test VALUES (1); - INSERT INTO test VALUES (2)"#, - )?; - - let mut statement = connection.prepare("SELECT * FROM test WHERE a = ?")?; - statement.bind((1, test_value))?; - statement.next()?; - assert_eq!(statement.read::("a")?, test_value); - Ok(()) - } - - #[test] - fn test_players() -> sqlite::Result<()> { - let connection = mock_datasets()?; - new_dataset(&connection, "test", metadata())?; - - add_players(&connection, "test", &vec![players(2)], Timestamp(0))?; - - let mut statement = connection.prepare("SELECT * FROM players WHERE id = 1")?; - statement.next()?; - assert_eq!(statement.read::("id")?, 1); - assert_eq!(statement.read::("name")?, "1"); - assert_eq!(statement.read::, _>("prefix")?, None); - - Ok(()) - } - - #[test] - fn edge_insert_get() -> sqlite::Result<()> { - let connection = mock_datasets()?; - new_dataset(&connection, "test", metadata())?; - add_players(&connection, "test", &vec![players(2)], Timestamp(0))?; - - insert_advantage(&connection, "test", PlayerId(2), PlayerId(1), 1.0)?; - - assert_eq!( - get_advantage(&connection, "test", PlayerId(1), PlayerId(2))?, - Some(-1.0) - ); - assert_eq!( - get_advantage(&connection, "test", PlayerId(2), PlayerId(1))?, - Some(1.0) - ); - - Ok(()) - } - - #[test] - fn player_all_edges() -> sqlite::Result<()> { - let connection = mock_datasets()?; - new_dataset(&connection, "test", metadata())?; - add_players(&connection, "test", &vec![players(3)], Timestamp(0))?; - - insert_advantage(&connection, "test", PlayerId(2), PlayerId(1), 1.0)?; - insert_advantage(&connection, "test", PlayerId(1), PlayerId(3), 5.0)?; - - assert_eq!( - get_edges(&connection, "test", PlayerId(1))?, - [(PlayerId(2), -1.0, 0), (PlayerId(3), 5.0, 0)] - ); - assert_eq!( - get_edges(&connection, "test", PlayerId(2))?, - [(PlayerId(1), 1.0, 0)] - ); - assert_eq!( - get_edges(&connection, "test", PlayerId(3))?, - [(PlayerId(1), -5.0, 0)] - ); - Ok(()) - } - - #[test] - fn hypoth_adv_trivial() -> sqlite::Result<()> { - let num_players = 3; - - let connection = mock_datasets()?; - new_dataset(&connection, "test", metadata())?; - add_players( - &connection, - "test", - &vec![players(num_players)], - Timestamp(0), - )?; - - let metadata = metadata(); - for i in 1..=num_players { - for j in 1..=num_players { - assert_eq!( - hypothetical_advantage( - &connection, - "test", - PlayerId(i), - PlayerId(j), - metadata.set_limit, - metadata.decay_rate, - metadata.adj_decay_rate - )?, - 0.0 - ); - } - } - - Ok(()) - } - - #[test] - fn hypoth_adv1() -> sqlite::Result<()> { - let connection = mock_datasets()?; - new_dataset(&connection, "test", metadata())?; - add_players(&connection, "test", &vec![players(2)], Timestamp(0))?; - - insert_advantage(&connection, "test", PlayerId(1), PlayerId(2), 1.0)?; - - let metadata = metadata(); - assert_eq!( - hypothetical_advantage( - &connection, - "test", - PlayerId(1), - PlayerId(2), - metadata.set_limit, - metadata.decay_rate, - metadata.adj_decay_rate - )?, - 1.0 - ); - - Ok(()) - } } diff --git a/src/main.rs b/src/main.rs index 0bc3dde..a5d68d9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -81,6 +81,12 @@ created if it does not already exist." #[arg(short, long, global = true, help = "The dataset to access")] dataset: Option, }, + Ranking { + #[command(subcommand)] + subcommand: RankingSC, + #[arg(short, long, global = true, help = "The dataset to access")] + dataset: Option, + }, } #[derive(Subcommand)] @@ -106,13 +112,20 @@ enum PlayerSC { Matchup { player1: String, player2: String }, } +#[derive(Subcommand)] +enum RankingSC { + #[command(about = "Create a new ranking")] + Create, +} + fn main() { let cli = Cli::parse(); - let config_dir = cli - .config_dir - .map(|mut s| { s.push("startrnr"); s }) - .unwrap_or_else(|| dirs::config_dir().expect("Could not determine config directory")); + let config_dir = cli.config_dir.unwrap_or_else(|| { + let mut dir = dirs::config_dir().expect("Could not determine config directory"); + dir.push("startrnr"); + dir + }); let mut data_dir = dirs::data_dir().expect("Could not determine user data directory"); data_dir.push("startrnr"); @@ -224,22 +237,10 @@ fn dataset_list(connection: &Connection) { ); } } - println!(); - - if metadata.set_limit != 0 && metadata.decay_rate != metadata.adj_decay_rate { - println!("\x1b[1mSet Limit:\x1b[0m {}", metadata.set_limit); - println!( - "\x1b[1mNetwork Decay Rate:\x1b[0m {} (adjusted {})", - metadata.decay_rate, metadata.adj_decay_rate - ); - } else { - println!("\x1b[1mNetwork Decay Rate:\x1b[0m {}", metadata.decay_rate); - } println!( - "\x1b[1mRating Period:\x1b[0m {} days", - metadata.period / SECS_IN_DAY as f64 + "\n\x1b[1mNetwork Decay Constant:\x1b[0m {}", + metadata.decay_const ); - println!("\x1b[1mTau Constant:\x1b[0m {}\n", metadata.tau); } } @@ -399,33 +400,11 @@ End date (year, m/y, or m/d/y): " } }; - // Set Limit - - let mut set_limit = 0; - print!( - " -\x1b[1mSet Limit\x1b[0m -The set limit is an optional feature of the rating system that defines how many -sets must be played between two players for their rating data to be considered -trustworthy. -This value should be set low, i.e. not more than 5 or 6. - -Set limit (default 0): " - ); - let set_limit_input = read_string(); - if !set_limit_input.is_empty() { - set_limit = set_limit_input - .parse::() - .unwrap_or_else(|_| error("Input is not an integer", 1)); - } - // Advanced Options // Defaults - let mut decay_rate = 0.8; - let mut adj_decay_rate = 0.6; - let mut period_days = 40.0; - let mut tau = 0.4; + let mut decay_const = 0.9; + let mut var_const = (10.0 - 0.04) / SECS_IN_YEAR as f64 / 3.0; print!("\nConfigure advanced options? (y/n) "); if let Some('y') = read_string().chars().next() { @@ -433,87 +412,42 @@ Set limit (default 0): " print!( " -\x1b[1mNetwork Decay Rate\x1b[0m -The network decay rate is a number between 0 and 1 that controls how the -advantage network reacts to player wins and losses. If the decay rate is 1, -then it is assumed that a player's skill against one opponent always carries -over to all other opponents. If the decay rate is 0, then all player match-ups -are assumed to be independent of each other. +\x1b[1mNetwork Decay Constant\x1b[0m +The network decay constant is a number between 0 and 1 that controls how +player wins and losses propagate throughout the network. If the decay +constant is 1, then it is assumed that a player's skill against one +opponent always carries over to all other opponents. If the decay +constant is 0, then all player match-ups are assumed to be independent of +each other. -Network decay rate (default 0.8): " +Network decay constant (default 0.9): " ); - let decay_rate_input = read_string(); - if !decay_rate_input.is_empty() { - decay_rate = decay_rate_input + let decay_const_input = read_string(); + if !decay_const_input.is_empty() { + decay_const = decay_const_input .parse::() .unwrap_or_else(|_| error("Input is not a number", 1)); - if decay_rate < 0.0 || decay_rate > 1.0 { + if decay_const < 0.0 || decay_const > 1.0 { error("Input is not between 0 and 1", 1); } } - // Adjusted Decay Rate - - if set_limit != 0 { - print!( - " -\x1b[1mAdjusted Network Decay Rate\x1b[0m -If the number of sets played between two players is less than the set limit, -then this value is used instead of the regular network decay rate. -This value should be \x1b[1mlower\x1b[0m than the network decay rate. - -Adjusted network decay rate (default 0.6): " - ); - let adj_decay_rate_input = read_string(); - if !adj_decay_rate_input.is_empty() { - adj_decay_rate = adj_decay_rate_input - .parse::() - .unwrap_or_else(|_| error("Input is not a number", 1)); - if decay_rate < 0.0 || decay_rate > 1.0 { - error("Input is not between 0 and 1", 1); - } - } - } - - // Rating Period + // Variance Constant print!( " -\x1b[1mRating Period\x1b[0m -The rating period is an interval of time that dictates how player ratings change -during inactivity. Ideally the rating period should be somewhat long, long -enough to expect almost every player in the dataset to have played at least a -few sets. +\x1b[1mVariance Rate\x1b[0m +This constant determines how quickly a player's variance (the uncertainty +of their rating) increases over time. See the end of \x1b[4m\x1b]8;;http:\ +//www.glicko.net/glicko/glicko.pdf\x1b\\this paper\x1b]8;;\x1b\\\x1b[0m for details +on how to compute a good value, or you can leave it blank and a reasonable +default will be chosen. -Rating period (in days, default 40): " +Variance rate: " ); - let period_input = read_string(); - if !period_input.is_empty() { - period_days = period_input - .parse::() - .unwrap_or_else(|_| error("Input is not a number", 1)); - } - - // Tau coefficient - - print!( - " -\x1b[1mTau Constant\x1b[0m -The tau constant is an internal system constant that roughly represents how -much random chance and luck play a role in game outcomes. In games where match -results are highly predictable, and a player's skill is the sole factor for -whether they will win, the tau constant should be high (0.9 - 1.2). In games -where luck matters, and more improbable victories can occur, the tau constant -should be low (0.2 - 0.4). - -The tau constant is set low by default, since skill-based competitive video -games tend to be on the more luck-heavy side. - -Tau constant (default 0.4): " - ); - let tau_input = read_string(); - if !tau_input.is_empty() { - tau = tau_input + let var_const_input = read_string(); + if !var_const_input.is_empty() { + var_const = var_const_input .parse::() .unwrap_or_else(|_| error("Input is not a number", 1)); } @@ -533,11 +467,8 @@ Tau constant (default 0.4): " game_slug, country, state, - set_limit, - decay_rate, - adj_decay_rate, - period: SECS_IN_DAY as f64 * period_days, - tau, + decay_const, + var_const, }, ) .expect("Error communicating with SQLite"); @@ -591,9 +522,6 @@ fn player_info(connection: &Connection, dataset: Option, player: String) } = get_player_from_input(connection, player) .unwrap_or_else(|_| error("Could not find player", 1)); - let (deviation, volatility, _) = get_player_rating_data(connection, &dataset, id) - .unwrap_or_else(|_| error("Could not find player", 1)); - let (won, lost) = get_player_set_counts(connection, &dataset, id) .unwrap_or_else(|_| error("Could not find player", 1)); @@ -613,9 +541,6 @@ fn player_info(connection: &Connection, dataset: Option, player: String) lost, (won as f64 / (won + lost) as f64) * 100.0 ); - - println!("\n\x1b[1mDeviation:\x1b[0m {}", deviation); - println!("\x1b[1mVolatility:\x1b[0m {}", volatility); } fn player_matchup( @@ -634,9 +559,6 @@ fn player_matchup( } = get_player_from_input(connection, player1) .unwrap_or_else(|_| error("Could not find player", 1)); - let (deviation1, _, _) = get_player_rating_data(connection, &dataset, player1) - .unwrap_or_else(|_| error("Could not find player", 1)); - let PlayerData { id: player2, name: name2, @@ -645,42 +567,34 @@ fn player_matchup( } = get_player_from_input(connection, player2) .unwrap_or_else(|_| error("Could not find player", 1)); - let (deviation2, _, _) = get_player_rating_data(connection, &dataset, player2) - .unwrap_or_else(|_| error("Could not find player", 1)); - - let (hypothetical, advantage) = get_advantage(connection, &dataset, player1, player2) - .expect("Error communicating with SQLite") - .map(|x| (false, x)) - .unwrap_or_else(|| { - let metadata = get_metadata(connection, &dataset) - .expect("Error communicating with SQLite") - .unwrap_or_else(|| error("Dataset not found", 1)); - ( - true, - hypothetical_advantage( + let (hypothetical, advantage, variance) = + get_network_data(connection, &dataset, player1, player2) + .expect("Error communicating with SQLite") + .map(|(adv, var)| (false, adv, var)) + .unwrap_or_else(|| { + let metadata = get_metadata(connection, &dataset) + .expect("Error communicating with SQLite") + .unwrap_or_else(|| error("Dataset not found", 1)); + let (adv, var) = hypothetical_advantage( connection, &dataset, player1, player2, - metadata.set_limit, - metadata.decay_rate, - metadata.adj_decay_rate, + metadata.decay_const, ) - .expect("Error communicating with SQLite"), - ) - }); + .expect("Error communicating with SQLite"); + (true, adv, var) + }); - let probability = 1.0 - / (1.0 - + f64::exp( - g_func((deviation1 * deviation1 + deviation2 * deviation2).sqrt()) * advantage, - )); + let probability = 1.0 / (1.0 + f64::exp(-advantage)); let (color, other_color) = ansi_num_color(advantage, 0.2, 2.0); let len1 = prefix1.as_deref().map(|s| s.len() + 1).unwrap_or(0) + name1.len(); let len2 = prefix2.as_deref().map(|s| s.len() + 1).unwrap_or(0) + name2.len(); + // Prefix + name for each player + if let Some(pre) = prefix1 { print!("\x1b[2m{}\x1b[22m ", pre); } @@ -698,26 +612,30 @@ fn player_matchup( discrim2, name2 ); + // Probability breakdown + println!( "\x1b[1m\x1b[{4}m{0:>2$}\x1b[0m - \x1b[1m\x1b[{5}m{1:<3$}\x1b[0m", - format!("{:.1}%", probability * 100.0), format!("{:.1}%", (1.0 - probability) * 100.0), + format!("{:.1}%", probability * 100.0), len1, len2, other_color, color ); - if hypothetical { - println!( - "\n\x1b[1mHypothetical Advantage: \x1b[{1}m{0:+.4}\x1b[0m", - advantage, color - ); - } else { - println!( - "\n\x1b[1mAdvantage: \x1b[{1}m{0:+.4}\x1b[0m", - advantage, color - ); + // Advantage + variance + + println!( + "\n\x1b[1m{0}Advantage: \x1b[{1}m{2:+.4}\x1b[39m\n{0}Variance: {3:.4}\x1b[0m", + if hypothetical { "Hypothetical " } else { "" }, + color, + advantage, + variance + ); + + if !hypothetical { + // Set count let (a, b) = get_matchup_set_counts(connection, &dataset, player1, player2) .expect("Error communicating with SQLite"); @@ -777,5 +695,70 @@ fn sync(connection: &Connection, auth: String, datasets: Vec, all: bool) } fn ranking_create(connection: &Connection, dataset: Option) { + use std::collections::HashMap; + let dataset = dataset.unwrap_or_else(|| String::from("default")); + + let metadata = get_metadata(connection, &dataset) + .expect("Error communicating with SQLite") + .unwrap_or_else(|| error("Dataset not found", 1)); + + let exp = read_string().parse::().unwrap(); + + let players = get_all_players(connection, &dataset).expect("Error communicating with SQLite"); + let num_players = players.len(); + let mut table = players + .into_iter() + .map(|id| (id, 1.0 / num_players as f64)) + .collect::>(); + table.shrink_to_fit(); + + let mut diff: f64 = 1.0; + let mut iter = 0; + + while diff > 1e-8 { + let mut new_table = HashMap::with_capacity(table.capacity()); + + for (&id, &last) in table.iter() { + let mut points = get_edges(connection, &dataset, id) + .expect("Error communicating with SQLite") + .into_iter() + .map(|(other, adv, _sets)| (other, exp.powf(adv))) + .collect::>(); + + points.push((id, 1.0)); + + let sum_points = points.iter().map(|(_, val)| val).sum::(); + + points.into_iter().for_each(|(other, pts)| { + let pts_ = last * pts / sum_points; + new_table + .entry(other) + .and_modify(|v| *v += pts_) + .or_insert(pts_); + }) + } + + if iter % 10 == 0 { + diff = (table + .iter() + .map(|(id, &last)| (new_table[id] - last) * (new_table[id] - last)) + .sum::() + / num_players as f64) + .sqrt(); + println!("{}", diff); + } + + table = new_table; + iter += 1; + } + + let mut list = table.into_iter().collect::>(); + list.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); + + println!(); + for (id, pts) in list.into_iter().take(20) { + let player = get_player(connection, id).unwrap(); + println!("{} - {}", player.name, pts); + } } diff --git a/src/sync.rs b/src/sync.rs index 4d2db87..9517743 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -1,4 +1,3 @@ -use std::f64::consts::PI; use std::thread::sleep; use std::time::Duration; @@ -8,86 +7,6 @@ use crate::queries::*; use itertools::Itertools; use sqlite::*; -// Glicko-2 system calculation - -pub fn g_func(dev: f64) -> f64 { - 1.0 / (1.0 + 3.0 * dev * dev / PI / PI).sqrt() -} - -fn time_adjust(periods: f64, old_dev_sq: f64, volatility: f64) -> f64 { - (old_dev_sq + periods * volatility * volatility).sqrt() -} - -fn illinois_optimize(fun: impl Fn(f64) -> f64, mut a: f64, mut b: f64) -> f64 { - let mut f_a = fun(a); - let mut f_b = fun(b); - - while (b - a).abs() > 1e-6 { - let c = a + (a - b) * f_a / (f_b - f_a); - let f_c = fun(c); - if f_c * f_b > 0.0 { - f_a = f_a / 2.0; - } else { - a = b; - f_a = f_b; - } - b = c; - f_b = f_c; - } - a -} - -fn glicko_adjust( - advantage: f64, - deviation: f64, - volatility: f64, - other_deviation: f64, - won: bool, - time: u64, - metadata: &DatasetMetadata, -) -> (f64, f64, f64) { - let period = metadata.period; - let tau = metadata.tau; - - let g_val = g_func(other_deviation); - let exp_val = 1.0 / (1.0 + f64::exp(-g_val * advantage)); - - let variance = 1.0 / (g_val * g_val * exp_val * (1.0 - exp_val)); - - let score = if won { 1.0 } else { 0.0 }; - let delta = variance * g_val * (score - exp_val); - - let delta_sq = delta * delta; - let dev_sq = deviation * deviation; - let a = (volatility * volatility).ln(); - let vol_fn = |x| { - let ex = f64::exp(x); - let subf = dev_sq + variance + ex; - ((ex * (delta_sq - dev_sq - variance - ex)) / 2.0 / subf / subf) - (x - a) / tau / tau - }; - - let initial_b = if delta_sq > dev_sq + variance { - (delta_sq - dev_sq - variance).ln() - } else { - (1..) - .map(|k| vol_fn(a - k as f64 * tau)) - .inspect(|x| { - if x.is_nan() { - panic!(); - } - }) - .find(|x| x >= &0.0) - .unwrap() - }; - let vol_new = f64::exp(illinois_optimize(vol_fn, a, initial_b) / 2.0); - - let dev_time = time_adjust(time as f64 / period, dev_sq, vol_new); - let dev_new = 1.0 / (1.0 / dev_time / dev_time + 1.0 / variance).sqrt(); - let adjust = dev_new * dev_new * g_val * (score - exp_val); - - (adjust, dev_new, vol_new) -} - // Extract set data fn get_event_sets(event: EventId, auth: &str) -> Option> { @@ -200,104 +119,67 @@ fn update_from_set( event_time: Timestamp, results: SetData, ) -> sqlite::Result<()> { - let players_data = results.teams; - // Fall back to event time if set time is not recorded - let time = results.time.unwrap_or(event_time); - add_players(connection, dataset, &players_data, time)?; + let teams = results.teams; // Non-singles matches are currently not supported - if players_data.len() != 2 || players_data[0].len() != 1 || players_data[1].len() != 1 { + if teams.len() != 2 || teams[0].len() != 1 || teams[1].len() != 1 { return Ok(()); } - let mut it = players_data.into_iter(); - let player1 = it.next().unwrap()[0].id; - let player2 = it.next().unwrap()[0].id; - drop(it); + let players = teams.into_iter().flatten().collect::>(); + add_players(connection, dataset, &players)?; - let (deviation1, volatility1, last_played1) = - get_player_rating_data(connection, dataset, player1)?; - let time1 = time.0.checked_sub(last_played1.0).unwrap_or(0); + let player1 = players[0].id; + let player2 = players[1].id; - let (deviation2, volatility2, last_played2) = - get_player_rating_data(connection, dataset, player1)?; - let time2 = time.0.checked_sub(last_played2.0).unwrap_or(0); + // Time-adjust all variances associated with each player + let time = results.time.unwrap_or(event_time); + adjust_for_time(connection, dataset, player1, metadata.var_const, time)?; + adjust_for_time(connection, dataset, player2, metadata.var_const, time)?; - let advantage = match get_advantage(connection, dataset, player1, player2) { + let (advantage, variance) = match get_network_data(connection, dataset, player1, player2) { Err(e) => Err(e)?, Ok(None) => initialize_edge( connection, dataset, player1, player2, - metadata.set_limit, - metadata.decay_rate, - metadata.adj_decay_rate, + metadata.decay_const, + time, )?, Ok(Some(adv)) => adv, }; - let (adjust1, dev_new1, vol_new1) = glicko_adjust( - -advantage, - deviation1, - volatility1, - deviation2, - results.winner == 0, - time1, - metadata, - ); - let (adjust2, dev_new2, vol_new2) = glicko_adjust( + + // println!("{}, {} - {}, {}", player1.0, player2.0, advantage, variance); + + glicko_adjust( + connection, + dataset, + &results.id, + player1, + player2, advantage, - deviation2, - volatility2, - deviation1, - results.winner == 1, - time2, - metadata, - ); + variance, + results.winner, + metadata.decay_const, + )?; - // Set minimum deviation level - let dev_new1 = f64::max(dev_new1, 0.2); - let dev_new2 = f64::max(dev_new2, 0.2); - - set_player_data( + set_player_set_counts( connection, dataset, player1, - time, - dev_new1, - vol_new1, results.winner == 0, &results.id, )?; - set_player_data( + set_player_set_counts( connection, dataset, player2, - time, - dev_new2, - vol_new2, results.winner == 1, &results.id, )?; - let (sets1, sets2) = get_matchup_set_counts(connection, dataset, player1, player2)?; - let decay_rate = if sets1 + sets2 >= metadata.set_limit { - metadata.decay_rate - } else { - metadata.adj_decay_rate - }; - - adjust_advantages( - connection, - dataset, - results.id, - player1, - player2, - results.winner, - adjust1, - adjust2, - decay_rate, - ) + Ok(()) } pub fn sync_dataset( @@ -340,45 +222,3 @@ pub fn sync_dataset( } connection.execute("COMMIT;") } - -#[cfg(test)] -mod tests { - use super::*; - use crate::database::tests::*; - - #[test] - fn glicko_single() -> sqlite::Result<()> { - let connection = mock_datasets()?; - new_dataset(&connection, "test", metadata())?; - let players = players(2).into_iter().map(|x| vec![x]).collect(); - add_players(&connection, "test", &players, Timestamp(0))?; - - update_from_set( - &connection, - "test", - &metadata(), - Timestamp(0), - SetData { - id: SetId(StringOrInt::Int(0)), - time: None, - teams: players, - winner: 0, - }, - )?; - - println!( - "{:?}", - get_advantage(&connection, "test", PlayerId(1), PlayerId(2))?.unwrap() - ); - println!( - "{:?}", - get_player_rating_data(&connection, "test", PlayerId(1)) - ); - println!( - "{:?}", - get_player_rating_data(&connection, "test", PlayerId(2)) - ); - - Ok(()) - } -} diff --git a/src/util.rs b/src/util.rs index feea160..311c5a8 100644 --- a/src/util.rs +++ b/src/util.rs @@ -8,6 +8,7 @@ use crate::queries::{PlayerData, PlayerId, Timestamp}; pub const SECS_IN_HR: u64 = 3600; pub const SECS_IN_DAY: u64 = SECS_IN_HR * 24; pub const SECS_IN_WEEK: u64 = SECS_IN_DAY * 7; +pub const SECS_IN_YEAR: u64 = SECS_IN_DAY * 365 + SECS_IN_HR * 6; pub fn error(msg: &str, code: i32) -> ! { eprintln!("\nERROR: {}", msg);