n should now be recognised
This commit is contained in:
parent
b38d4f2430
commit
a22acaddc3
129
src/main.rs
129
src/main.rs
@ -1,62 +1,20 @@
|
||||
use std::env::args;
|
||||
|
||||
// TODO: add "n" and Digraphs
|
||||
const KNOWN_GRAPHEMES: [&str; 73] = [
|
||||
"a", "i", "u", "e", "o", "ka", "ki", "ku", "ke", "ko", "ga", "gi", "gu", "ge", "go", "sa",
|
||||
// TODO: add Digraphs
|
||||
const KNOWN_GRAPHEMES: [&str; 74] = [
|
||||
"n", "a", "i", "u", "e", "o", "ka", "ki", "ku", "ke", "ko", "ga", "gi", "gu", "ge", "go", "sa",
|
||||
"shi", "su", "se", "so", "za", "ji", "zu", "ze", "zo", "ta", "chi", "tsu", "te", "to", "da",
|
||||
"ji", "zu", "de", "do", "na", "ni", "nu", "ne", "no", "ha", "hi", "fu", "he", "ho", "ba", "bi",
|
||||
"bu", "be", "bo", "pa", "pi", "pu", "pe", "po", "ma", "mi", "mu", "me", "mo", "ya", "yu", "ye",
|
||||
"yo", "ra", "ri", "ru", "re", "ro", "wa", "wi", "we", "wo",
|
||||
];
|
||||
|
||||
// convert unicode japanese into the latin alphabet, which is necessary in order to use split_word()
|
||||
fn romanize(word: String) -> String {
|
||||
todo!();
|
||||
}
|
||||
|
||||
// look at the first letter, if it exists in KNOWN_GRAPHEMES, then add to graphemes
|
||||
// else look at the first and second letter, if that exists in KNOWN_GRAPHEMES, then add that to grapehemes
|
||||
fn split_word(word: String) -> Vec<String> {
|
||||
let mut graphemes: Vec<String> = vec![];
|
||||
let mut word_chars = word.chars();
|
||||
let mut grapheme: String;
|
||||
|
||||
loop {
|
||||
grapheme = match word_chars.next() {
|
||||
Some(val) => String::from(val),
|
||||
None => break,
|
||||
};
|
||||
|
||||
// TODO: figure out how to deal with 'n'
|
||||
if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
|
||||
graphemes.push(grapheme);
|
||||
} else {
|
||||
match word_chars.next() {
|
||||
Some(val) => grapheme.push(val),
|
||||
None => panic!("word contains non japanese grapheme: {grapheme}"),
|
||||
};
|
||||
|
||||
if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
|
||||
graphemes.push(grapheme);
|
||||
} else {
|
||||
panic!("word contains non japanese grapheme: {grapheme}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return graphemes;
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// dbg!(KNOWN_GRAPHEMES);
|
||||
// TODO: loop over args in order to work with sentences
|
||||
|
||||
// dbg!(&args());
|
||||
|
||||
if args().len() != 2 {
|
||||
if std::env::args().len() != 2 {
|
||||
panic!("fuck");
|
||||
}
|
||||
|
||||
let input = match args().last() {
|
||||
let mut input = match std::env::args().last() {
|
||||
Some(val) => val.trim().to_lowercase(),
|
||||
None => panic!("how?!"),
|
||||
};
|
||||
@ -64,9 +22,82 @@ fn main() {
|
||||
// TODO: sanitise even further. check if input contains any illegal chars
|
||||
// input should only contain a-z, A-Z or in the future Unicode japanese chars
|
||||
|
||||
if is_invalid(&input) {
|
||||
panic!("input {} is invalid", input);
|
||||
}
|
||||
|
||||
if !input.is_ascii() {
|
||||
input = romanize(&input);
|
||||
}
|
||||
|
||||
println!("Sanitised input: {}", input);
|
||||
|
||||
let graphemes = split_word(input);
|
||||
let graphemes = split_word(&input);
|
||||
|
||||
dbg!(graphemes);
|
||||
}
|
||||
|
||||
fn is_invalid(word: &String) -> bool {
|
||||
dbg!(&word);
|
||||
return false;
|
||||
}
|
||||
|
||||
// convert unicode japanese into the latin alphabet
|
||||
// return romanized version of input
|
||||
fn romanize(word: &String) -> String {
|
||||
dbg!(&word);
|
||||
todo!();
|
||||
}
|
||||
|
||||
// This is absolute cancer
|
||||
fn split_word(word: &String) -> Vec<String> {
|
||||
let mut graphemes: Vec<String> = vec![];
|
||||
let mut word_chars = word.chars();
|
||||
|
||||
loop {
|
||||
let mut grapheme: String = String::new();
|
||||
|
||||
match word_chars.next() {
|
||||
Some(val) => grapheme.push(val),
|
||||
None => break,
|
||||
};
|
||||
|
||||
if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) && grapheme != "n" {
|
||||
graphemes.push(grapheme);
|
||||
continue;
|
||||
}
|
||||
|
||||
match word_chars.next() {
|
||||
Some(val) => grapheme.push(val),
|
||||
_ => (),
|
||||
};
|
||||
|
||||
if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
|
||||
graphemes.push(grapheme);
|
||||
continue;
|
||||
}
|
||||
|
||||
if grapheme.starts_with('n') {
|
||||
if grapheme.trim_end_matches("aiueo") == "n" {
|
||||
graphemes.push(grapheme);
|
||||
continue;
|
||||
} else {
|
||||
graphemes.push(String::from('n'));
|
||||
grapheme.remove(0);
|
||||
}
|
||||
}
|
||||
|
||||
match word_chars.next() {
|
||||
Some(val) => grapheme.push(val),
|
||||
_ => (),
|
||||
};
|
||||
|
||||
if KNOWN_GRAPHEMES.contains(&grapheme.as_str()) {
|
||||
graphemes.push(grapheme);
|
||||
} else {
|
||||
panic!("word contains non japanese grapheme: {grapheme}");
|
||||
}
|
||||
}
|
||||
|
||||
return graphemes;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user