Common Algorithms

From Personal Wiki

The code published on this page is provided with the conditions of the MIT license.

Integer to Words

This converts an integer into English words. Solving this problem was an interesting exercise, as I ended up parsing in 3 stages which were: splitting the digit groups, inserting symbols for base 10 exponents and ampersands after groups of hundreds, and then converting the symbols into words.

I initially started my implementation in Java, then after finishing, moved onto C++. The Rust version was the most difficult, as, of time of writing it, I was still fairly new to the language.

Java

package com.bgcottrell;

import java.util.*;

public class Main {
    public static String[] E0Literals = "zero,one,two,three,four,five,six,seven,eight,nine".split(",");
    public static String[] TNLiterals = "eleven,twelve,thirteen,fourteen,fifteen,sixteen,seventeen,eighteen,nineteen".split(",");
    public static String[] E1Literals = "ten,twenty,thirty,forty,fifty,sixty,seventy,eighty,ninety".split(",");
    public static String parseIntString(String strValue)
    {
        StringBuilder result = new StringBuilder();
        Long parsedValue = 0L;
        //int r = 0;
        try {
            parsedValue = Long.parseUnsignedLong(strValue);
            //r = strValue.length();
        } catch (NumberFormatException e) {
            throw new RuntimeException(e);
        }
        // Split with commas
        int numCommas = (int)Math.ceil((double) strValue.length() /6);
        StringBuilder tokens = new StringBuilder(strValue.length() + numCommas);
        if (strValue.length() < 4) {
            for (int i = strValue.length() - 1; i >= 0; i--) {
                tokens.insert(0, strValue.charAt(i));
            }
        } else {
            for (int i = strValue.length() - 1; i >= 0; i--) {
                tokens.insert(0, strValue.charAt(i));
                if ((strValue.length() - i) % 3 == 0 && (i > 0)) {
                    tokens.insert(0, ',');
                }
            }
        }
        System.out.printf("(%s) ",tokens);

        // Parse, insert exponents, hundred's joiners
        String groups[] = tokens.toString().split(",");
        ArrayList<String> parserTokens = new ArrayList<>();
        for (int gi=0; gi < groups.length; gi++) {
            switch (groups[gi].length()) {
                case 3: {
                    Integer p[] = {
                            Integer.parseUnsignedInt(String.valueOf(groups[gi].charAt(0))),
                            Integer.parseUnsignedInt(groups[gi].substring(1)),
                    };
                    if (p[0] != 0) {
                        parserTokens.add(E0Literals[p[0]]);
                        parserTokens.add("H");
                    }
                    if (p[1] != 0 && p[1] < 10) {
                        parserTokens.add("&");
                        parserTokens.add(E0Literals[p[1]]);
                    } else if (p[1] != 0 && p[1] < 20) {
                        parserTokens.add("&");
                        parserTokens.add(TNLiterals[p[1] - 11]);
                    } else {
                        if (groups[gi].charAt(1) != '0') {
                            parserTokens.add("&");
                            parserTokens.add(
                                    E1Literals[Integer.parseUnsignedInt(String.valueOf(groups[gi].charAt(1))) - 1]
                            );
                        }
                        if (groups[gi].charAt(2) != '0') {
                            parserTokens.add(
                                E0Literals[Integer.parseUnsignedInt(String.valueOf(groups[gi].charAt(2)))]
                            );
                        }
                    }
                    break;
                }
                case 2:
                {
                    Integer p = Integer.parseUnsignedInt(groups[gi]);
                    if (p < 10) {
                        parserTokens.add(E0Literals[p]);
                    } else if (p < 20) {
                        parserTokens.add(TNLiterals[p - 11]);
                    } else {
                        parserTokens.add(
                                E1Literals[Integer.parseUnsignedInt(String.valueOf(groups[gi].charAt(0))) - 1]
                        );
                        if (groups[gi].charAt(1) != '0') {
                            parserTokens.add(
                                    E0Literals[Integer.parseUnsignedInt(String.valueOf(groups[gi].charAt(1)))]
                            );
                        }
                    }
                    break;
                }
                case 1: {
                    Integer p = Integer.parseUnsignedInt(groups[gi]);
                    parserTokens.add(E0Literals[p]);
                }
            }
            if (gi == groups.length - 2 && groups[gi] != "000") {
                parserTokens.add("T");
            } else if (gi == groups.length - 3) {
                parserTokens.add("M");
            } else if (gi == groups.length - 4) {
                parserTokens.add("B");
            }
        }
        //System.out.printf("Parsed: %s", parserTokens);
        for (int i =0; i < parserTokens.size() - 1;i++) {
            switch (parserTokens.get(i)) {
                case "H":
                    result.append("hundred");
                    break;
                case "T":
                    result.append("thousand");
                    break;
                case "M":
                    result.append("million");
                    break;
                case "B":
                    result.append("billion");
                    break;
                case "&":
                    result.append("and");
                    break;
                default:
                    result.append(parserTokens.get(i));
            }
            result.append(" ");
        }
        switch (parserTokens.getLast()) {
            case "H":
                result.append("hundred");
                break;
            case "T":
                result.append("thousand");
                break;
            case "M":
                result.append("million");
                break;
            case "B":
                result.append("billion");
            case "&":
                result.append("and");
            default:
                result.append(parserTokens.getLast());
        }
        return result.toString();
    }
}

C++17

#include <cmath>
#include <iostream>
#include <iomanip>
#include <vector>
#include <ranges>
#include <string_view>
#include <algorithm>
#include <format>
static string IntToWords(const string& input)
{
    static array E0Literals = {"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"};
    static array TNLiterals = {"eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"};
    static array E1Literals = {"ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
    string result;
    try {
        ulong parsedValue = stoul(input);
    } catch (const exception& ex) {
        return "invalid";
    }
    // Split with commas, make it easier to parse
    const auto numCommas = int(ceil(input.length()/6));
    string tokens;
    tokens.reserve(input.length()+numCommas);
    if (input.length() < 4) {
        for (int i=input.length()-1;i>=0;i--) {
            tokens.insert(tokens.begin(), input.at(i));
        }
    } else {
        for (int i=input.length()-1;i>=0;i--) {
            tokens.insert(tokens.begin(),input.at(i));
            if ((input.length() -i) % 3 == 0) {
                tokens.insert(tokens.begin(),',');
            }
        }
    }
    // Parse, insert exponents, joiners
    vector<string> groups = split(tokens, ",");
    vector<string> parserTokens;
    parserTokens.reserve(input.length()*2);
    for (size_t gi=0; gi != groups.size(); gi++) {
        switch (groups[gi].length()) {
        case 3:
        {
            vector<int> p = {
                stoi(groups[gi].substr(0,1)),
                stoi(groups[gi].substr(1))
            };
            if (p[0] != 0) {
                parserTokens.push_back(E0Literals[p[0]]);
                parserTokens.push_back("H");
            }
            if (p[1] != 0 && p[1] < 10) {
                parserTokens.push_back("&");
                parserTokens.push_back(E0Literals[p[1]]);
            } else if (p[1] != 0 && p[1] < 20) {
                parserTokens.push_back("&");
                parserTokens.push_back(TNLiterals[p[1]-11]);
            } else {
                if (groups[gi].at(1) != '0') {
                    parserTokens.push_back("&");
                    parserTokens.push_back(
                        E1Literals[stoi(groups[gi].substr(1,1))-1]
                    );
                }
                if (groups[gi].at(2) != '0') {
                    parserTokens.push_back(
                        E0Literals[stoi(groups[gi].substr(2,1))]
                    );
                }
            }
            break;
        }
        case 2:
        {
            int p = stoi(groups[gi]);
            if (p < 10) {
                parserTokens.push_back(E0Literals[p]);
            } else if (p < 20) {
                parserTokens.push_back(TNLiterals[p-11]);
            } else {
                parserTokens.push_back(
                    E1Literals[stoi(groups[gi].substr(0,1))-1]
                );
                if (groups[gi].at(1) != '0') {
                    parserTokens.push_back(
                        E0Literals[stoi(groups[gi].substr(1,1))]
                    );
                }
            }
            break;
        }
        case 1:
        {
            int p = stoi(groups[gi]);
            parserTokens.push_back(E0Literals[p]);
            break;
        }
        } /* switch */
        if (gi == groups.size() - 2 && groups[gi] != "000") {
            parserTokens.push_back("T");
        } else if (gi == groups.size() - 3) {
            parserTokens.push_back("M");
        } else if (gi == groups.size() - 4) {
            parserTokens.push_back("B");
        } else if (gi == groups.size() - 5) {
            parserTokens.push_back("Tr");
        }
        } /* for */
        for (int i=0; i < parserTokens.size() - 1; i++) {
            string s = parserTokens.at(i);
            if (s == "H") {
                result.append("hundred");
            } else if (s == "T") {
                result.append("thousand");
            } else if (s == "M") {
                result.append("million");
            } else if (s == "B") {
                result.append("billion");
            } else if (s == "Tr") {
                result.append("trillion");
            } else if (s == "&") {
                result.append("and");
            } else {
                result.append(s);
            }
            result.append(" ");
        }
        auto s = parserTokens.back();
        if (s == "H") {
            result.append("hundred");
        } else if (s == "T") {
            result.append("thousand");
        } else if (s == "M") {
            result.append("million");
        } else if (s == "B") {
            result.append("billion");
        } else if (s == "Tr") {
            result.append("trillion");
        } else if (s == "&") {
            result.append("and");
        } else {
            result.append(s);
        }
    return result;
}

Rust

use std::iter::Iterator;
use std::ops::Deref;
use std::str::FromStr;
struct IntToWords;
impl IntToWords
{
    // Using Option, because I cannot figure out how to return generic errors
    pub fn parse_value(&self, input: &String) -> Option<String>
    {
        let E0Literals: Vec<&'static str> = "zero,one,two,three,four,five,six,seven,eight,nine".split(',').collect();
        let TNLiterals: Vec<&'static str> = "eleven,twelve,thirteen,fourteen,fifteen,sixteen,seventeen,eighteen,nineteen".split(',').collect();
        let E1Literals: Vec<&'static str> = "ten,twenty,thirty,forty,fifty,sixty,seventy,eighty,ninety".split(',').collect();
        let mut result: String = String::from("");
        let parsed_value = u64::from_str(input);
        if parsed_value.is_err() {
            return Some(String::from("invalid"));
        }
        let num_commas = (input.len() as f32/6.0).ceil();
        // Split into digit groups
        let mut tokens: String = "".to_string();
        tokens.reserve(input.len()+num_commas as usize);
        if input.len() < 4 {
            let mut i = input.len()-1;
            loop {
                tokens.insert(0, input.chars().nth(i).unwrap());
                if i > 0 { i -= 1; } else { break; }
            }
        } else {
            let mut i = input.len()-1;
            loop {
                tokens.insert(0,input.chars().nth(i).unwrap());
                if (input.len()-i) % 3 == 0 {
                    tokens.insert(0,',');
                }
                if i > 0 { i -= 1; } else { break; }
            }
        }
        // Parse the digit groups
        let groups : Vec<&str> = tokens.split(',').collect();
        let mut parser_tokens:Vec<&str> = Vec::new();
        for gi in 0..groups.len() {
            match groups[gi].len() {
                3 => {
                    let p = [
                        usize::from_str(&groups[gi].chars().as_str()[0..1]).unwrap(),
                        usize::from_str(&groups[gi].chars().as_str()[1..]).unwrap()
                    ];
                    let p1 = (p[1] as f32/10_f32).floor() as u32;
                    let p2 = p[1] as u32 - (p1 as u32 * 10);
                    if p[0] != 0 {
                        parser_tokens.push(E0Literals[p[0]]);
                        parser_tokens.push("H");
                    }
                    if p[1] != 0 && p[1] < 10 {
                        parser_tokens.push("&");
                        parser_tokens.push(E0Literals[p[1]]);
                    } else if p[1] != 0 && p[1] < 20 {
                        parser_tokens.push("&");
                        parser_tokens.push(TNLiterals[p[1]-11]);
                    } else {
                        if groups[gi].chars().nth(1).unwrap() != '0' {
                            parser_tokens.push("&");
                            parser_tokens.push(
                                E1Literals[p1 as usize - 1]
                            );
                        }
                        if groups[gi].chars().nth(2).unwrap() != '0' {
                            parser_tokens.push(
                                E0Literals[p2 as usize]
                            );
                        }
                    }
                }
                2 => {
                    let p = usize::from_str(&groups[gi]).unwrap();
                    let p1 = (p as f32/10_f32).floor() as u32;
                    let p2 = p as u32 - (p1 * 10);
                    if p < 10 {
                        parser_tokens.push(E0Literals[p]);
                    } else if p < 20 {
                        parser_tokens.push(TNLiterals[p-11]);
                    } else {
                        parser_tokens.push(
                            E1Literals[p1 as usize-1]
                        );
                        if groups[gi].chars().nth(1).unwrap() != '0' {
                            parser_tokens.push(
                                E0Literals[p2 as usize]
                            );
                        }
                    }
                }
                1 => {
                    let p = usize::from_str(groups[gi]).unwrap();
                    parser_tokens.push(E0Literals[p]);
                }
                _ => {}
            } /* match */
            if groups.len() >= 2 && gi == groups.len() - 2 && groups[gi] != "000" {
                parser_tokens.push("T");
            } else if groups.len() >= 3 && gi == groups.len() - 3 {
                parser_tokens.push("M");
            } else if groups.len() >= 4 && gi == groups.len() - 4 {
                parser_tokens.push("B");
            } else if groups.len() >= 5 && gi == groups.len() - 5 {
                parser_tokens.push("Tr");
            }
        }
        // Convert tokens to words
        for i in 0..parser_tokens.len() - 1 {
            let s = parser_tokens[i];
            result.push_str(match s {
                "H" => "hundred",
                "T" => "thousand",
                "M" => "million",
                "B" => "billion",
                "Tr" => "trillion",
                "&" => "and",
                _ => s
            });
            result.push_str(" ");
        }
        let s = parser_tokens.last().unwrap().deref();
        result.push_str(match s {
            "H" => "hundred",
            "T" => "thousand",
            "M" => "million",
            "B" => "billion",
            "Tr" => "trillion",
            "&" => "and",
            _ => s
        });
        Some(result)
    }
}