git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/data/translit/si_si

0 / 166 ( 0%)

Commit	Line	Data
	1	# ***************************************************************************
	2	# *
	3	# * Copyright (C) 2004-2016, International Business Machines
	4	# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
	5	# *
	6	# ***************************************************************************
	7	# File: si_si_FONIPA.txt
	8	# Generated from CLDR
	9	#
	10
	11	# Sinhala pronunciation rules
	12	#
	13	# Output
	14	# k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
	15	# ə əː a aː æ æː i iː u uː e eː o oː
	16	#
	17	# References
	18	# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
	19	# Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
	20	# Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
	21	# pages 890–897. http://www.aclweb.org/anthology/P06-2114
	22	# Simplify ya + yansaya to plain ya after a consonant.
	23	[\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCA‍ය → ය;
	24	# Delete ZWNJ and ZWJ to simplify further processing.
	25	\u200C → ;
	26	\u200D → ;
	27	# Insert a schwa after every consonant that is not followed by a dependent vowel
	28	# or virama.
	29	::Null;
	30	([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə;
	31	# Pronunciation rules proper.
	32	::Null;
	33	# fප is an alternative spelling of ෆ.
	34	# This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield)
	35	# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
	36	[Ff]ප → f;
	37	# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
	38	# This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease)
	39	# [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය]
	40	# or in zස\u0DD3බ\u0DCA‍රා (zebra) [see https://si.wikipedia.org/wiki/‍zස\u0DD3බ\u0DCA‍රා].
	41	[Zz]ස → z;
	42	ං → ŋ;
	43	o → ŋ; # common substitution for anusvaraya
	44	ඃ ([\u0D9A-\u0DC6]) → \| $1 \u0DCA $1; # TODO: check which consonants geminate
	45	ඃ → h;
	46	අ → a;
	47	ආ → aː;
	48	ඇ → æ;
	49	ඈ → æː;
	50	ඉ → i;
	51	ඊ → iː;
	52	උ → u;
	53	ඌ → uː;
	54	ඍ → ri;
	55	ඎ → ruː;
	56	ඏ → ilu;
	57	ඐ → iluː;
	58	එ → e;
	59	ඒ → eː;
	60	ඓ → aj;
	61	ඔ → o;
	62	ඕ → oː;
	63	ඖ → aw; # TODO: check if this is correct
	64	ක → k;
	65	ඛ → k;
	66	ග → ɡ;
	67	ඝ → ɡ;
	68	ඞ → ŋ;
	69	ඟ → ᵑɡ;
	70	ච → c;
	71	ඡ → c;
	72	ජ → ɟ;
	73	ඣ → ɟ;
	74	ඤ → ɲ;
	75	ඥ → kɲ; # TODO: double-check
	76	ඦ → ɟ;
	77	ට → ʈ;
	78	ඨ → ʈ;
	79	ඩ → ɖ;
	80	ඪ → ɖ;
	81	ණ → n;
	82	ඬ → ⁿɖ;
	83	ත → t;
	84	ථ → t;
	85	ද → d;
	86	ධ → d;
	87	න → n;
	88	ඳ → ⁿd;
	89	ප → p;
	90	ඵ → p;
	91	බ → b;
	92	භ → b;
	93	ම → m;
	94	ඹ → ᵐb;
	95	ය → j;
	96	ර → r;
	97	ල → l;
	98	ව → w;
	99	ශ → ʃ;
	100	ෂ → ʃ;
	101	ස → s;
	102	හ → h;
	103	ළ → l;
	104	ෆ → f;
	105	\u0DCA → ; # delete virama
	106	ා → aː;
	107	ැ → æ;
	108	ෑ → æː;
	109	\u0DD2 → i;
	110	\u0DD3 → iː;
	111	\u0DD4 → u;
	112	\u0DD6 → uː;
	113	ෘ → ru;
	114	ෙ → e;
	115	ේ → eː;
	116	ෛ → aj;
	117	ො → o;
	118	ෝ → oː;
	119	ෞ → aw; # TODO: check if this is correct
	120	ෟ → lu;
	121	ෲ → ruː;
	122	ෳ → luː;
	123	# Heuristics for turning /ə/ into /a/. Based on [1].
	124	$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
	125	$s=[:^L:];
	126	# Rule #1
	127	::Null;
	128	$s sv { ə → ə; # exception (a)
	129	$s k { ə } r → ə; # exception (b)
	130	$s $c { ə } $s → ə; # exception (c)
	131	$s $c $c { ə → a;
	132	$s $c { ə → a;
	133	# Rule #2
	134	::Null;
	135	$c r { ə } $c → a; # clause (a) and (b)
	136	$c r { a } h → a; # clause (d), exception
	137	$c r { a } $c → ə; # clause (c)
	138	# Rule #3
	139	# The paper is unclear about what this rule means. The interpretation here
	140	# assumes that "preceded" in the paper is a typo and should be read "followed".
	141	::Null;
	142	[a e æ o ə] h { ə → a;
	143	# Rules #4 through #7
	144	::Null;
	145	ə } $c $c → a; # Rule #4
	146	ə } [rbɖʈ] $s → ə; # Rule #5 exception
	147	ə } $c $s → a; # Rule #5
	148	ə } ji $s → a; # Rule #6
	149	k { ə } [rl] u → a; # Rule #7
	150	# Rule #8
	151	# Note that the paper doesn't say explicitly that this rule should be
	152	# anchored at the beginning of a word, but the remarks before the rules
	153	# seem to imply this.
	154	::Null;
	155	$s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/.
	156	$s k { a } le[mh][ui] → ə;
	157	$s k { alə } h[ui] → əle;
	158	$s k { a } lə → ə;
	159	# Diphthongs
	160	::Null;
	161	www+ → ww; # යෞව\u0DCAවන
	162	[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w;
	163	əji → aj;
	164	iji → iː; # perhaps: ij
	165	[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;
	166