/// <summary> /// Converts input to as close to sentence\ correct case as possible /// </summary> /// <param name="input"> /// Free text for case correction /// </param> /// <returns> /// Free text converted to sentence\ correct case /// </returns> public string SentenceCase(string input) {
//Break out if there is not work to do if (string.IsNullOrEmpty(input)) { return ""; }
//Array of characters that signal the next character should be capitalised List<char> lineTerminators = new List<char>(new char[] { '.', ':', '\n', '\r', '!', '?' }); //Indicates if previous character was a lineTerminators Boolean start = true; //Indicates that currently parsing personal pronoun contraction (e.g. I'll, 'I'd, I'm etc.) Boolean personalProNoun = false; //outputed string char[] output = new char[input.Length]; //Character previous to current character char cPrev = new char(); //Character following current character char cNext; //Current character char c;
//Loop through input chars for (int i = 0; i < input.Length; i++) {
c = input[i]; cNext = i < (input.Length - 1) ? input[i + 1] : '.';
//current character is always lower case if part of personal pronoun contraction if (personalProNoun) { output[i] = Char.ToLower(c); start = false; //Set personalProNoun to false if the next character is not a letter personalProNoun = Char.IsLetter(cNext); } //any non-letter characters are added to output else if (!Char.IsLetter(c)) { output[i] = c;
//Line terminators set start boolean to true if (lineTerminators.Contains(c)) { start = true; } //Numbers count as starting a new sentence, other characters do not else if (Char.IsNumber(c) && start) { start = false; } } //"I" or "i" with no letters either side are assumed to be personal pronoun "I" else if ((c.Equals('I') || c.Equals('i')) && !Char.IsLetter(cPrev) && !Char.IsLetter(cNext)) { //Append "I" in upper case output[i] = Char.ToUpper(c); start = false;
//If the following character is apostrophe we are starting a personal pronoun contraction if (cNext.Equals('\'')) { personalProNoun = true; } } //If the last character was a line terminator then current letter is upper case else if (start) { start = false; output[i] = Char.ToUpper(c); } //Lower case letters are unchanged else if (Char.IsLower(c)) { output[i] = c; start = false; } //Only upper case letters remain now else { start = false;
//If this is the start of a se4ntence, add the letter as it is if (start) { output[i] = c; } //if there are uppercase letters either side of this letter, convert to lower case //This allows us to retain the odd upper case character when the freee text is not //solely made up fo upper case letters else if (Char.IsUpper(cPrev) || Char.IsUpper(cNext)) { output[i] = Char.ToLower(c); } else { output[i] = c; } }
cPrev = c;
}
return new string(output);
}
|