diff --git a/src/main.rs b/src/main.rs index d51102b..3777147 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,18 @@ use std::env::args; -const KNOWN_GRAPHEMES: [&str; 10] = ["a", "i", "u", "e", "o", "ka", "ki", "ku", "ke", "ko"]; +// TODO: add "n" and Digraphs +const KNOWN_GRAPHEMES: [&str; 73] = [ + "a", "i", "u", "e", "o", "ka", "ki", "ku", "ke", "ko", "ga", "gi", "gu", "ge", "go", "sa", + "shi", "su", "se", "so", "za", "ji", "zu", "ze", "zo", "ta", "chi", "tsu", "te", "to", "da", + "ji", "zu", "de", "do", "na", "ni", "nu", "ne", "no", "ha", "hi", "fu", "he", "ho", "ba", "bi", + "bu", "be", "bo", "pa", "pi", "pu", "pe", "po", "ma", "mi", "mu", "me", "mo", "ya", "yu", "ye", + "yo", "ra", "ri", "ru", "re", "ro", "wa", "wi", "we", "wo", +]; + +// convert unicode japanese into the latin alphabet, which is necessary in order to use split_word() +fn romanize(word: String) -> String { + todo!(); +} // look at the first letter, if it exists in KNOWN_GRAPHEMES, then add to graphemes // else look at the first and second letter, if that exists in KNOWN_GRAPHEMES, then add that to grapehemes @@ -14,19 +26,20 @@ fn split_word(word: String) -> Vec { Some(val) => String::from(val), None => break, }; - dbg!(&grapheme); - let temp = grapheme.as_str(); - - if KNOWN_GRAPHEMES.contains(&temp) { + // TODO: figure out how to deal with 'n' + if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) { graphemes.push(grapheme); } else { - grapheme.push(word_chars.next().unwrap()); + match word_chars.next() { + Some(val) => grapheme.push(val), + None => panic!("word contains non japanese grapheme: {grapheme}"), + }; - let temp = grapheme.as_str(); - - if KNOWN_GRAPHEMES.contains(&temp) { + if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) { graphemes.push(grapheme); + } else { + panic!("word contains non japanese grapheme: {grapheme}"); } } } @@ -48,6 +61,9 @@ fn main() { None => panic!("how?!"), }; + // TODO: sanitise even further. check if input contains any illegal chars + // input should only contain a-z, A-Z or in the future Unicode japanese chars + println!("Sanitised input: {}", input); let graphemes = split_word(input);