n should now be recognised

2024-12-19 12:21:46 +01:00 · 2024-12-19 12:21:46 +01:00 · a22acaddc3
commit a22acaddc3
parent b38d4f2430
1 changed files with 80 additions and 49 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -1,62 +1,20 @@
-use std::env::args;
-
-// TODO: add "n" and Digraphs
-const KNOWN_GRAPHEMES: [&str; 73] = [
-    "a", "i", "u", "e", "o", "ka", "ki", "ku", "ke", "ko", "ga", "gi", "gu", "ge", "go", "sa",
+// TODO: add Digraphs
+const KNOWN_GRAPHEMES: [&str; 74] = [
+    "n", "a", "i", "u", "e", "o", "ka", "ki", "ku", "ke", "ko", "ga", "gi", "gu", "ge", "go", "sa",
    "shi", "su", "se", "so", "za", "ji", "zu", "ze", "zo", "ta", "chi", "tsu", "te", "to", "da",
    "ji", "zu", "de", "do", "na", "ni", "nu", "ne", "no", "ha", "hi", "fu", "he", "ho", "ba", "bi",
    "bu", "be", "bo", "pa", "pi", "pu", "pe", "po", "ma", "mi", "mu", "me", "mo", "ya", "yu", "ye",
    "yo", "ra", "ri", "ru", "re", "ro", "wa", "wi", "we", "wo",
 ];

-// convert unicode japanese into the latin alphabet, which is necessary in order to use split_word()
-fn romanize(word: String) -> String {
-    todo!();
-}
-
-// look at the first letter, if it exists in KNOWN_GRAPHEMES, then add to graphemes
-// else look at the first and second letter, if that exists in KNOWN_GRAPHEMES, then add that to grapehemes
-fn split_word(word: String) -> Vec<String> {
-    let mut graphemes: Vec<String> = vec![];
-    let mut word_chars = word.chars();
-    let mut grapheme: String;
-
-    loop {
-        grapheme = match word_chars.next() {
-            Some(val) => String::from(val),
-            None => break,
-        };
-
-        // TODO: figure out how to deal with 'n'
-        if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
-            graphemes.push(grapheme);
-        } else {
-            match word_chars.next() {
-                Some(val) => grapheme.push(val),
-                None => panic!("word contains non japanese grapheme: {grapheme}"),
-            };
-
-            if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
-                graphemes.push(grapheme);
-            } else {
-                panic!("word contains non japanese grapheme: {grapheme}");
-            }
-        }
-    }
-
-    return graphemes;
-}
-
 fn main() {
-    // dbg!(KNOWN_GRAPHEMES);
+    // TODO: loop over args in order to work with sentences

-    // dbg!(&args());
-
-    if args().len() != 2 {
+    if std::env::args().len() != 2 {
        panic!("fuck");
    }

-    let input = match args().last() {
+    let mut input = match std::env::args().last() {
        Some(val) => val.trim().to_lowercase(),
        None => panic!("how?!"),
    };
@ -64,9 +22,82 @@ fn main() {
    // TODO: sanitise even further. check if input contains any illegal chars
    // input should only contain a-z, A-Z or in the future Unicode japanese chars

+    if is_invalid(&input) {
+        panic!("input {} is invalid", input);
+    }
+
+    if !input.is_ascii() {
+        input = romanize(&input);
+    }
+
    println!("Sanitised input: {}", input);

-    let graphemes = split_word(input);
+    let graphemes = split_word(&input);

    dbg!(graphemes);
 }
+
+fn is_invalid(word: &String) -> bool {
+    dbg!(&word);
+    return false;
+}
+
+// convert unicode japanese into the latin alphabet
+// return romanized version of input
+fn romanize(word: &String) -> String {
+    dbg!(&word);
+    todo!();
+}
+
+// This is absolute cancer
+fn split_word(word: &String) -> Vec<String> {
+    let mut graphemes: Vec<String> = vec![];
+    let mut word_chars = word.chars();
+
+    loop {
+        let mut grapheme: String = String::new();
+
+        match word_chars.next() {
+            Some(val) => grapheme.push(val),
+            None => break,
+        };
+
+        if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) && grapheme != "n" {
+            graphemes.push(grapheme);
+            continue;
+        }
+
+        match word_chars.next() {
+            Some(val) => grapheme.push(val),
+            _ => (),
+        };
+
+        if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
+            graphemes.push(grapheme);
+            continue;
+        }
+
+        if grapheme.starts_with('n') {
+            if grapheme.trim_end_matches("aiueo") == "n" {
+                graphemes.push(grapheme);
+                continue;
+            } else {
+                graphemes.push(String::from('n'));
+                grapheme.remove(0);
+            }
+        }
+
+        match word_chars.next() {
+            Some(val) => grapheme.push(val),
+            _ => (),
+        };
+
+        if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
+            graphemes.push(grapheme);
+        } else {
+            panic!("word contains non japanese grapheme: {grapheme}");
+        }
+    }
+
+    return graphemes;
+}