pick -n supports closest name match (#138)

qwed81 · web-flow · commit 4b76c46bfe02 · 2023-12-19T11:12:28.000+08:00
diff --git a/src/cmds/pick.rs b/src/cmds/pick.rs
@@ -1,5 +1,6 @@
 //! Pick command
 use super::Command;
+use crate::cache::models::Problem;
 use crate::err::Error;
 use async_trait::async_trait;
 use clap::{Arg, ArgAction, ArgMatches, Command as ClapCommand};
@@ -136,12 +137,12 @@ impl Command for PickCommand {
         };
 
         let fid = match m.contains_id("name") {
-            //check for name specified
+            // check for name specified, or closest name
             true => {
                 match m.get_one::<String>("name").map(|name| name) {
-                    Some(quesname) => match cache.get_problem_id_from_name(quesname) {
-                        Ok(p) => p,
-                        Err(_) => 1,
+                    Some(quesname) => match closest_named_problem(&problems, quesname) {
+                        Some(p) => p,
+                        None => 1,
                     },
                     None => {
                         // Pick random without specify id
@@ -177,3 +178,69 @@ impl Command for PickCommand {
         Ok(())
     }
 }
+
+// Returns the closest problem according to a scoring algorithm
+// taking into account both the longest common subsequence and the size
+// problem string (to compensate for smaller strings having smaller lcs).
+// Returns None if there are no problems in the problem list
+fn closest_named_problem(problems: &Vec<Problem>, lookup_name: &str) -> Option<i32> {
+    let max_name_size: usize = problems.iter().map(|p| p.name.len()).max()?;
+    // Init table to the max name length of all the problems to share
+    // the same table allocation
+    let mut table: Vec<usize> = vec![0; (max_name_size + 1) * (lookup_name.len() + 1)];
+
+    // this is guaranteed because of the earlier max None propegation
+    assert!(problems.len() > 0);
+    let mut max_score = 0;
+    let mut current_problem = &problems[0];
+    for problem in problems {
+        // In case bug becomes bugged, always return the matching string
+        if problem.name == lookup_name {
+            return Some(problem.fid);
+        }
+
+        let this_lcs = longest_common_subsequence(&mut table, &problem.name, lookup_name);
+        let this_score = this_lcs * (max_name_size - problem.name.len());
+
+        if this_score > max_score {
+            max_score = this_score;
+            current_problem = &problem;
+        }
+    }
+
+    Some(current_problem.fid)
+}
+
+// Longest commong subsequence DP approach O(nm) space and time. Table must be at least
+// (text1.len() + 1) * (text2.len() + 1) length or greater and is mutated every call
+fn longest_common_subsequence(table: &mut Vec<usize>, text1: &str, text2: &str) -> usize {
+    assert!(table.len() >= (text1.len() + 1) * (text2.len() + 1));
+    let height: usize = text1.len() + 1;
+    let width: usize = text2.len() + 1;
+
+    // initialize base cases to 0
+    for i in 0..height {
+        table[i * width + (width - 1)] = 0;
+    }
+    for j in 0..width {
+        table[((height - 1) * width) + j] = 0;
+    }
+
+    let mut i: usize = height - 1;
+    let mut j: usize;
+    for c0 in text1.chars().rev() {
+        i -= 1;
+        j = width - 1;
+        for c1 in text2.chars().rev() {
+            j -= 1;
+            if c0.to_lowercase().next() == c1.to_lowercase().next() {
+                table[i * width + j] = 1 + table[(i + 1) * width + j + 1];
+            } else {
+                let a = table[(i + 1) * width + j];
+                let b = table[i * width + j + 1];
+                table[i * width + j] = std::cmp::max(a, b);
+            }
+        }
+    }
+    table[0]
+}