Random Surname Generator
Aswering the challenge raised by Jon Galloway on his must-read blog, here’s is my solution to generate some random and fake surnames. Code has plenty of comments. Feel free, of course, to comment questions, criticism or even better, a cool refactor! =)
{
//An arraylist to contain ALL possible digrams (see below)
private static ArrayList AllDigrams = new ArrayList();
//An array list to contain ALL possible letters (see below)
private static ArrayList AllLetters = new ArrayList();
//A random seed for the main random generator
private static Random rSeed = new Random();
//A multidimensional array specifying the random "bones" of a surname.
// "A" is a vowel letter, "B" is a consonant letter
//"AB is a vowel + consonant digram, and so on…
//These combinations are made by hand, you could add or delete
//to fine-tune the kind of surnames you get
private static string[,] SurnameRandomTypes = new string[10, 6]
{
{"A", "BA", "B", "A", "BA", ""},
{"B", "A", "BA", "", "", ""},
{"A", "BA", "A", "B", "A", "BB"},
{"A", "B", "B", "A", "BA", ""},
{"A", "BB", "AB", "A", "", ""},
{"BA", "B", "B", "A", "B", "A"},
{"B", "AB", "A", "BB", "A", "AB"},
{"B", "AB", "B", "A", "BB", "AB"},
{"A", "BB", "A", "B", "AB", ""},
{"AB", "AA", "B", "A", "B", "A"},
};
/// <summary>
///
/// </summary>
static void Main()
{
bool exit = false;
//We load ALL the possible digrams in memory,
//in the AllDigrams arraylist
LoadDigrams();
//Same for the possible letters,
//in the AllLetters arraylist
LoadLetters();
do
{
for (int i = 0; i <= 20; i++)
{
Console.WriteLine("Random Surname: {0}",
GenerateRandomSurname());
}
Console.WriteLine("Press ENTER to generate another batch, X to quit");
string sInput = Console.ReadLine();
if (sInput.ToString() == "x" || sInput.ToString() == "X")
exit = true;
} while (exit == false);
Console.ReadLine();
}
/// <summary>
/// This method generates a random surname and returns it.
/// </summary>
/// <returns></returns>
static string GenerateRandomSurname()
{
string sRet = "";
Random r;
//New random seed for the randomizer
r = new Random(rSeed.Next(1, 1000));
//and we get a random index for the
//SurnameRandomTypes multidimensional array
int rndIndex = r.Next(0, 9);
//Then we read the different "columns" of
//that "row" and decide if we need a vowel,
//a consonant, a vowel-consonant digram,
//or whatever
for (int i = 0; i < 6; i++)
{
switch(SurnameRandomTypes[rndIndex, i])
{
case "AA":
sRet += getDigram(DigramType.TwoVowels);
break;
case "AB":
sRet += getDigram(DigramType.VowelAndConsonant);
break;
case "BA":
sRet += getDigram(DigramType.ConsonantAndVowel);
break;
case "BB":
sRet += getDigram(DigramType.TwoConsonants);
break;
case "A":
sRet += getLetter(LetterType.Vowel);
break;
case "B":
sRet += getLetter(LetterType.Consonant);
break;
default:
break;
}
}
//And we return the generated surname
//making the first letter upper case
return sRet.Substring(0,1).ToUpper() +
sRet.Substring(1);
}
#region Letters and Digrams
#region Structs and Enums
/// <summary>
/// With this enum we’ll know what
/// the digram is made of
/// </summary>
private enum DigramType
{
TwoVowels,
VowelAndConsonant,
ConsonantAndVowel,
TwoConsonants
}
/// <summary>
/// With this enum we’ll know is a letter
/// is a vowel or not
/// </summary>
private enum LetterType
{
Vowel,
Consonant
}
/// <summary>
/// A digram. With the actual digram ("th", for example)
/// we store the range of frequency it appears on the
/// english language and its type
/// </summary>
private struct Digram
{
public int iniRange; //The beginning of its range
public int endRange; //The end of its range
public string Value; //Its actual content
public DigramType Type; //Its type
}
/// <summary>
/// A letter. With the actual letter ("a", for example)
/// we store the range of frequency it appears on the
/// english language and its type
/// </summary>
private struct Letter
{
public int iniRange; //The beginning of its range
public int endRange; //The end of its range
public string Value; //Its actual content
public LetterType Type; //Its type
}
#endregion
/// <summary>
/// This method returns the value
/// of a random digram of the specified type
/// </summary>
/// <param name="type">The type of digram the caller needs</param>
/// <returns>The digram VALUE ("th", for example)</returns>
static string getDigram(DigramType type)
{
Random r = new Random(rSeed.Next(0, 1000));
Digram nuDigram = new Digram();
do
{
//We get a random number inside the TOTAL
//range of ALL the digrams
int freq = r.Next(0, 5548); //See LoadDigrams()
foreach (Digram digram in AllDigrams)
{
//And if the random number is inside the
//current digram frequency range,
//we’ve got a winner!
if (freq >= digram.iniRange &&
freq <= digram.endRange)
nuDigram = digram;
}
//But only if it’s of the type we need
} while (nuDigram.Type != type);
return nuDigram.Value;
}
/// <summary>
/// Gets a single letter VALUE of the specified type
/// </summary>
/// <param name="type">the type of Letter we need</param>
/// <returns>the Letter VALUE</returns>
static string getLetter(LetterType type)
{
Random r = new Random(rSeed.Next(0, 1000));
Letter nuLetter = new Letter();
do
{
//We get a random number inside the TOTAL
//range of ALL the letters
int freq = r.Next(0, 10025); //See LoadLetters()
foreach (Letter letter in AllLetters)
{
//And if the random number
//is inside the current digram
//frequency range, we’ve got a winner!
if (freq >= letter.iniRange &&
freq <= letter.endRange)
nuLetter = letter;
}
//But only if it’s of the type we need
} while (nuLetter.Type != type);
return nuLetter.Value;
}
/// <summary>
/// Simple boolean check to see if a given single letter is
/// a vowel or a consonant
/// </summary>
/// <param name="checkLetter">The letter to check</param>
/// <returns>True if vowel, false if else</returns>
static bool IsAVowel(string checkLetter)
{
switch(checkLetter.ToLower())
{
case "a":
case "e":
case "i":
case "o":
case "u":
return true;
default:
return false;
}
}
/// <summary>
/// Thanks to the University of Bristol Department of Computer Science
/// (and Google) I’ve managed to get a list of all the letters in the
/// english alphabet and their frequency of use in the English language
/// Since Random deals with integers, I’ve multiplied all these values * 100
/// The sum of all of them should be 10,000, which when calculating the
/// range of random numbers to draw should be 10,000 + the number of possible
/// letters; because the first letter range is from 0 to 1231, and the next
/// letter range begins en 1232 (prior end range +1)
/// In the case of single letters this is correct, and the ranges are from
/// 0 to 10,025.
///
/// But, I don’t know WHY, in the case of digrams the range goes
/// from 0 to 5,548. The frequency percentage of all the avaliable digrams,
/// each of them multiplied by 100 and all of them added is NOT 10,000 as it
/// should be. Anyone knows why?
/// </summary>
static void LoadLetters()
{
// http://www.cs.bris.ac.uk/Teaching/Resources/COMS30124/Labs/freq.html
// Percentage Frequency of Single Letters
//
// E 12.31 L 4.03 B 1.62
// T 9.59 D 3.65 G 1.61
// A 8.05 C 3.20 V 0.93
// O 7.94 U 3.10 K 0.52
// N 7.19 P 2.29 Q 0.20
// I 7.18 F 2.28 X 0.20
// S 6.59 M 2.25 J 0.10
// R 6.03 W 2.03 Z 0.09
// H 5.14 Y 1.88
string[] _letterValue =
{
"e", "l", "b", "t", "d", "g", "a", "c", "v",
"o", "u", "k", "n", "p", "q", "i", "f", "x",
"s", "m", "j", "r", "w", "z", "h", "y"
};
int[] _letterFreq =
{
1231, 403, 162, 959, 365, 161, 805, 320, 93,
794, 310, 52, 719, 229, 20, 718, 228, 20,
659, 225, 10, 603, 203, 9, 514, 188
};
int lastEndRange = 0;
//Examining those hard-coded arrays we fill
//the AllLetters collection of letter objects,
//assigning each one its range, value and LetterType
for (int i = 0; i < _letterFreq.Length; i++)
{
Letter nuLetter = new Letter();
nuLetter.iniRange = lastEndRange;
nuLetter.endRange = lastEndRange + _letterFreq[i];
lastEndRange = nuLetter.endRange + 1;
nuLetter.Value = _letterValue[i];
nuLetter.Type = IsAVowel(_letterValue[i]) ?
LetterType.Vowel : LetterType.Consonant;
AllLetters.Add(nuLetter);
}
}
/// <summary>
/// See LoadLetters comment
/// </summary>
static void LoadDigrams()
{
// http://www.cs.bris.ac.uk/Teaching/Resources/COMS30124/Labs/freq.html
//
// TH 3.15 TO 1.11 SA 0.75 MA 0.56
// HE 2.51 NT 1.10 HI 0.72 TA 0.56
// AN 1.72 ED 1.07 LE 0.72 CE 0.55
// IN 1.69 IS 1.06 SO 0.71 IC 0.55
// ER 1.54 AR 1.01 AS 0.67 LL 0.55
// RE 1.48 OU 0.96 NO 0.65 NA 0.54
// ES 1.45 TE 0.94 NE 0.64 RO 0.54
// ON 1.45 OF 0.94 EC 0.64 OT 0.53
// EA 1.31 IT 0.88 IO 0.63 TT 0.53
// TI 1.28 HA 0.84 RT 0.63 VE 0.53
// AT 1.24 SE 0.84 CO 0.59 NS 0.51
// ST 1.21 ET 0.80 BE 0.58 UR 0.49
// EN 1.20 AL 0.77 DI 0.57 ME 0.48
// ND 1.18 RI 0.77 LI 0.57 WH 0.48
// OR 1.13 NG 0.75 RA 0.57 LY 0.47
string[] _digramValue =
{
"th", "to", "sa", "ma", "he", "nt", "hi", "ta",
"an", "ed", "le", "ce", "in", "is", "so", "ic",
"er", "ar", "as", "ll", "re", "ou", "no", "na",
"es", "te", "ne", "ro", "on", "of", "ec", "ot",
"ea", "it", "io", "tt", "ti", "ha", "rt", "ve",
"at", "se", "co", "ns", "st", "et", "be", "ur",
"en", "al", "di", "me", "nd", "ri", "li", "wh",
"or", "ng", "ra", "ly"
};
int[] _digramFreq =
{
315, 111, 75, 56, 251, 110, 72, 56,
172, 107, 72, 55, 169, 106, 71, 55,
154, 101, 67, 55, 148, 96, 65, 54,
145, 94, 64, 54, 145, 94, 64, 53,
131, 88, 63, 53, 128, 84, 63, 53,
124, 84, 59, 51, 121, 80, 58, 49,
120, 77, 57, 48, 118, 77, 57, 48,
113, 75, 57, 47
};
int lastEndRange = 0;
//Examining those hard-coded arrays
//we fill the AllDigrams collection
//of Digram objects, assigning each
//one its range, value and DigramType
for (int i = 0; i < _digramFreq.Length; i++)
{
Digram nuDigram = new Digram();
nuDigram.iniRange = lastEndRange;
nuDigram.endRange = lastEndRange +
_digramFreq[i];
lastEndRange = nuDigram.endRange + 1;
nuDigram.Value = _digramValue[i];
if (IsAVowel(_digramValue[i].Substring(0, 1)) &&
IsAVowel(_digramValue[i].Substring(1, 1)))
nuDigram.Type = DigramType.TwoVowels;
else if (!IsAVowel(_digramValue[i].Substring(0, 1)) &&
IsAVowel(_digramValue[i].Substring(1, 1)))
nuDigram.Type = DigramType.ConsonantAndVowel;
else if (IsAVowel(_digramValue[i].Substring(0, 1)) &&
!IsAVowel(_digramValue[i].Substring(1, 1)))
nuDigram.Type = DigramType.VowelAndConsonant;
else nuDigram.Type = DigramType.TwoConsonants;
AllDigrams.Add(nuDigram);
}
}
#endregion
}
Colorized by: CarlosAg.CodeColorizer
And here’s an example of the surnames it makes:
- Andaso
- Estecon
- Ovloso
- Eleasawh
- Athene
- Tertore
- Setolyiis
- Face
- Lirsuco
- Rofsender





Random (and fake) surname generator
You’ve been kicked (a good thing) - Trackback from DotNetKicks.com
Trackback by DotNetKicks.com — Fri 12 January, 2007 @ 19:04
Code Puzzle #2 - Generate random fake surnames - Recap
Code Puzzle #2 posed the following task: Write a simple function which generates fake but passable surnames
Trackback by Jon Galloway — Sat 13 January, 2007 @ 06:54
it’s great, fantastic mister !!!
Comment by Rofsender — Mon 28 April, 2008 @ 12:22