diff --git a/.gitattributes b/.gitattributes
index c622f57b8..65dbf4920 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,3 +7,7 @@
# Have Github ignore js vendored files.
# https://github.com/gramps-project/gramps/tree/master/data/javascript
#data/javascript/*.js linguist-vendored
+
+# don't mess with line endings for Gedcom files
+*.ged binary
+*.GED binary
diff --git a/data/tests/ANSEL_CR.GED b/data/tests/ANSEL_CR.GED
new file mode 100644
index 000000000..ea588a26e
--- /dev/null
+++ b/data/tests/ANSEL_CR.GED
@@ -0,0 +1 @@
+0 HEAD
1 CHAR ANSEL
1 SOUR REGISTERED_SOURCE_NAME
1 GEDC
2 VERS 5.5
2 FORM LINEAGE-LINKED
1 NOTE This GEDCOM transmission contains a charcter set test. It consists
2 CONT of a single family (two parents, many children). The parents are empty
2 CONT in the ANSEL version of the transmission. The children contain the
2 CONT combined letters and the special charcters (value > 128).
2 CONT The NAME tag of each 'person' is the name of the characters tested
2 CONT within the person. The BIRT.PLAC and DEAT.PLAC tags contain the
2 CONT test-strings.
2 CONT The first children contain special characters. Here the test string
2 CONT is 'character name (test character), ...' where 'character name'
2 CONT is the name of the character (like 'british pound') and
2 CONT 'test character' is a single byte representing this character
2 CONT in ANSEL.
2 CONT The last children contain combined characters. The name tag gives
2 CONT the name of the non-spacing character tested within the 'person'.
2 CONT Within the name the hex-values of the non-spacing character is given
2 CONT in ANSEL and UNICODE. The test strings contain the whole latin
2 CONT alphabet combined with this non-spacing character: captial letters
2 CONT in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag.
2 CONT Example: One 'person' is named 'circle above'. The BIRT.PLAC
2 CONT tag contains all 26 capital letters with a small ring on top.
2 CONT Note: Not all charcters can be displayed on all computers.
2 CONT This strongly depends on the installed fonts and codepages.
2 CONT Many of the combined characters generated here do not even have
2 CONT a UNICDOE code point!
2 CONT This file based mainly on the GEDCOM 5.5 specification
2 CONT (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip)
2 CONT and on an updated ANSEL description in:
2 CONT http://www.gendex.com/gedcom55/55gcappd.htm
1 SUBM @SUBMITTER@
1 DATE 20 JAN 1998
0 @SUBMITTER@ SUBM
1 NAME /H. Eichmann/
1 ADDR email: h.eichmann@@gmx.de
0 @FATHER@ INDI
1 NAME /cyrillic (not possible in ANSEL)/
1 SEX M
1 FAMS @FAMILY@
0 @MOTHER@ INDI
1 NAME /greek (not possible in ANSEL)/
1 SEX F
1 FAMS @FAMILY@
0 @CHILD0@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 0/
1 BIRT
2 PLAC slash l - uppercase (), slash o - uppercase (), slash d - uppercase (), thorn - uppercase ()
1 DEAT
2 PLAC ligature ae - uppercase (), ligature oe - uppercase (), miagkii znak (), middle dot (), musical flat ()
0 @CHILD1@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 1/
1 BIRT
2 PLAC patent mark (), plus-or-minus (), hook o - uppercase (), hook u - uppercase ()
1 DEAT
2 PLAC alif (), ayn (), slash l - lowercase (), slash o - lowercase (), slash d - lowercase ()
0 @CHILD2@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 2/
1 BIRT
2 PLAC thorn - lowercase (), ligature ae - lowercase (), ligature oe - lowercase (), tverdyi znak ()
1 DEAT
2 PLAC dotless i - lowercase (), british pound (), eth (), hook o - lowercase (), hook u - lowercase ()
0 @CHILD3@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 3/
1 BIRT
2 PLAC degree sign (), script l (), phonograph copyright mark (), copyright symbol ()
1 DEAT
2 PLAC musical sharp (), inverted question mark (), inverted exclamation mark (), es zet ()
0 @CHILD4@ INDI
1 FAMC @FAMILY@
1 NAME code: E0 (Unicode: hook above, 0309)/low rising tone mark/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD5@ INDI
1 FAMC @FAMILY@
1 NAME code: E1 (Unicode: grave, 0300)/grave accent/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD6@ INDI
1 FAMC @FAMILY@
1 NAME code: E2 (Unicode: acute, 0301)/acute accent/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD7@ INDI
1 FAMC @FAMILY@
1 NAME code: E3 (Unicode: circumflex, 0302)/circumflex accent/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD8@ INDI
1 FAMC @FAMILY@
1 NAME code: E4 (Unicode: tilde, 0303)/tilde/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD9@ INDI
1 FAMC @FAMILY@
1 NAME code: E5 (Unicode: macron, 0304)/macron/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD10@ INDI
1 FAMC @FAMILY@
1 NAME code: E6 (Unicode: breve, 0306)/breve/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD11@ INDI
1 FAMC @FAMILY@
1 NAME code: E7 (Unicode: dot above, 0307)/dot above/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD12@ INDI
1 FAMC @FAMILY@
1 NAME code: E8 (Unicode: diaeresis, 0308)/umlaut (dieresis)/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD13@ INDI
1 FAMC @FAMILY@
1 NAME code: E9 (Unicode: caron, 030C)/hacek/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD14@ INDI
1 FAMC @FAMILY@
1 NAME code: EA (Unicode: ring above, 030A)/circle above (angstrom)/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD15@ INDI
1 FAMC @FAMILY@
1 NAME code: EB (Unicode: ligature left half, FE20)/ligature, left half/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD16@ INDI
1 FAMC @FAMILY@
1 NAME code: EC (Unicode: ligature right half, FE21)/ligature, right half/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD17@ INDI
1 FAMC @FAMILY@
1 NAME code: ED (Unicode: comma above right, 0315)/high comma, off center/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD18@ INDI
1 FAMC @FAMILY@
1 NAME code: EE (Unicode: double acute, 030B)/double acute accent/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD19@ INDI
1 FAMC @FAMILY@
1 NAME code: EF (Unicode: candrabindu, 0310)/candrabindu/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD20@ INDI
1 FAMC @FAMILY@
1 NAME code: F0 (Unicode: cedilla, 0327)/cedilla/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD21@ INDI
1 FAMC @FAMILY@
1 NAME code: F1 (Unicode: ogonek, 0328)/right hook/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD22@ INDI
1 FAMC @FAMILY@
1 NAME code: F2 (Unicode: dot below, 0323)/dot below/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD23@ INDI
1 FAMC @FAMILY@
1 NAME code: F3 (Unicode: diaeresis below, 0324)/double dot below/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD24@ INDI
1 FAMC @FAMILY@
1 NAME code: F4 (Unicode: ring below, 0325)/circle below/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD25@ INDI
1 FAMC @FAMILY@
1 NAME code: F5 (Unicode: double low line, 0333)/double underscore/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD26@ INDI
1 FAMC @FAMILY@
1 NAME code: F6 (Unicode: line below, 0332)/underscore/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD27@ INDI
1 FAMC @FAMILY@
1 NAME code: F7 (Unicode: comma below, 0326)/left hook/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD28@ INDI
1 FAMC @FAMILY@
1 NAME code: F8 (Unicode: left half ring below, 031C)/right cedilla/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD29@ INDI
1 FAMC @FAMILY@
1 NAME code: F9 (Unicode: breve below, 032E)/half circle below/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD30@ INDI
1 FAMC @FAMILY@
1 NAME code: FA (Unicode: double tilde left half, FE22)/double tilde, left half/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD31@ INDI
1 FAMC @FAMILY@
1 NAME code: FB (Unicode: double tilde right half, FE23)/double tilde, right half/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @CHILD32@ INDI
1 FAMC @FAMILY@
1 NAME code: FE (Unicode: comma above, 0313)/high comma, centered/
1 BIRT
2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
1 DEAT
2 PLAC abcdefghijklmnopqrstuvwxyz
0 @FAMILY@ FAM
1 HUSB @FATHER@
1 WIFE @MOTHER@
1 CHIL @CHILD0@
1 CHIL @CHILD1@
1 CHIL @CHILD2@
1 CHIL @CHILD3@
1 CHIL @CHILD4@
1 CHIL @CHILD5@
1 CHIL @CHILD6@
1 CHIL @CHILD7@
1 CHIL @CHILD8@
1 CHIL @CHILD9@
1 CHIL @CHILD10@
1 CHIL @CHILD11@
1 CHIL @CHILD12@
1 CHIL @CHILD13@
1 CHIL @CHILD14@
1 CHIL @CHILD15@
1 CHIL @CHILD16@
1 CHIL @CHILD17@
1 CHIL @CHILD18@
1 CHIL @CHILD19@
1 CHIL @CHILD20@
1 CHIL @CHILD21@
1 CHIL @CHILD22@
1 CHIL @CHILD23@
1 CHIL @CHILD24@
1 CHIL @CHILD25@
1 CHIL @CHILD26@
1 CHIL @CHILD27@
1 CHIL @CHILD28@
1 CHIL @CHILD29@
1 CHIL @CHILD30@
1 CHIL @CHILD31@
1 CHIL @CHILD32@
0 TRLR
\ No newline at end of file
diff --git a/data/tests/ANSEL_CR.gramps b/data/tests/ANSEL_CR.gramps
new file mode 100644
index 000000000..83eb3fbb7
--- /dev/null
+++ b/data/tests/ANSEL_CR.gramps
@@ -0,0 +1,926 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic (not possible in ANSEL)
+
+
+
+
+ F
+
+ greek (not possible in ANSEL)
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: E0 (Unicode: hook above, 0309)
+ low rising tone mark
+
+
+
+
+
+
+ U
+
+ code: E1 (Unicode: grave, 0300)
+ grave accent
+
+
+
+
+
+
+ U
+
+ code: E2 (Unicode: acute, 0301)
+ acute accent
+
+
+
+
+
+
+ U
+
+ code: E3 (Unicode: circumflex, 0302)
+ circumflex accent
+
+
+
+
+
+
+ U
+
+ code: E4 (Unicode: tilde, 0303)
+ tilde
+
+
+
+
+
+
+ U
+
+ code: E5 (Unicode: macron, 0304)
+ macron
+
+
+
+
+
+
+ U
+
+ code: E6 (Unicode: breve, 0306)
+ breve
+
+
+
+
+
+
+ U
+
+ code: E7 (Unicode: dot above, 0307)
+ dot above
+
+
+
+
+
+
+ U
+
+ code: E8 (Unicode: diaeresis, 0308)
+ umlaut (dieresis)
+
+
+
+
+
+
+ U
+
+ code: E9 (Unicode: caron, 030C)
+ hacek
+
+
+
+
+
+
+ U
+
+ code: EA (Unicode: ring above, 030A)
+ circle above (angstrom)
+
+
+
+
+
+
+ U
+
+ code: EB (Unicode: ligature left half, FE20)
+ ligature, left half
+
+
+
+
+
+
+ U
+
+ code: EC (Unicode: ligature right half, FE21)
+ ligature, right half
+
+
+
+
+
+
+ U
+
+ code: ED (Unicode: comma above right, 0315)
+ high comma, off center
+
+
+
+
+
+
+ U
+
+ code: EE (Unicode: double acute, 030B)
+ double acute accent
+
+
+
+
+
+
+ U
+
+ code: EF (Unicode: candrabindu, 0310)
+ candrabindu
+
+
+
+
+
+
+ U
+
+ code: F0 (Unicode: cedilla, 0327)
+ cedilla
+
+
+
+
+
+
+ U
+
+ code: F1 (Unicode: ogonek, 0328)
+ right hook
+
+
+
+
+
+
+ U
+
+ code: F2 (Unicode: dot below, 0323)
+ dot below
+
+
+
+
+
+
+ U
+
+ code: F3 (Unicode: diaeresis below, 0324)
+ double dot below
+
+
+
+
+
+
+ U
+
+ code: F4 (Unicode: ring below, 0325)
+ circle below
+
+
+
+
+
+
+ U
+
+ code: F5 (Unicode: double low line, 0333)
+ double underscore
+
+
+
+
+
+
+ U
+
+ code: F6 (Unicode: line below, 0332)
+ underscore
+
+
+
+
+
+
+ U
+
+ code: F7 (Unicode: comma below, 0326)
+ left hook
+
+
+
+
+
+
+ U
+
+ code: F8 (Unicode: left half ring below, 031C)
+ right cedilla
+
+
+
+
+
+
+ U
+
+ code: F9 (Unicode: breve below, 032E)
+ half circle below
+
+
+
+
+
+
+ U
+
+ code: FA (Unicode: double tilde left half, FE22)
+ double tilde, left half
+
+
+
+
+
+
+ U
+
+ code: FB (Unicode: double tilde right half, FE23)
+ double tilde, right half
+
+
+
+
+
+
+ U
+
+ code: FE (Unicode: comma above, 0313)
+ high comma, centered
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ slash l - uppercase (Ł), slash o - uppercase (Ø), slash d - uppercase (Đ), thorn - uppercase (Þ)
+
+
+
+ ligature ae - uppercase (Æ), ligature oe - uppercase (Œ), miagkii znak (ʹ), middle dot (·), musical flat (♭)
+
+
+
+ patent mark (®), plus-or-minus (±), hook o - uppercase (Ơ), hook u - uppercase (Ư)
+
+
+
+ alif (ʼ), ayn (ʻ), slash l - lowercase (ł), slash o - lowercase (ø), slash d - lowercase (đ)
+
+
+
+ thorn - lowercase (þ), ligature ae - lowercase (æ), ligature oe - lowercase (œ), tverdyi znak (ʺ)
+
+
+
+ dotless i - lowercase (ı), british pound (£), eth (ð), hook o - lowercase (ơ), hook u - lowercase (ư)
+
+
+
+ degree sign (°), script l (ℓ), phonograph copyright mark (℗), copyright symbol (©)
+
+
+
+ musical sharp (♯), inverted question mark (¿), inverted exclamation mark (¡), es zet (ß)
+
+
+
+ ẢB̉C̉D̉ẺF̉G̉H̉ỈJ̉K̉L̉M̉N̉ỎP̉Q̉R̉S̉T̉ỦV̉W̉X̉ỶZ̉
+
+
+
+ ảb̉c̉d̉ẻf̉g̉h̉ỉj̉k̉l̉m̉n̉ỏp̉q̉r̉s̉t̉ủv̉w̉x̉ỷz̉
+
+
+
+ ÀB̀C̀D̀ÈF̀G̀H̀ÌJ̀K̀L̀M̀ǸÒP̀Q̀R̀S̀T̀ÙV̀ẀX̀ỲZ̀
+
+
+
+ àb̀c̀d̀èf̀g̀h̀ìj̀k̀l̀m̀ǹòp̀q̀r̀s̀t̀ùv̀ẁx̀ỳz̀
+
+
+
+ ÁB́ĆD́ÉF́ǴH́ÍJ́ḰĹḾŃÓṔQ́ŔŚT́ÚV́ẂX́ÝŹ
+
+
+
+ áb́ćd́éf́ǵh́íj́ḱĺḿńóṕq́ŕśt́úv́ẃx́ýź
+
+
+
+ ÂB̂ĈD̂ÊF̂ĜĤÎĴK̂L̂M̂N̂ÔP̂Q̂R̂ŜT̂ÛV̂ŴX̂ŶẐ
+
+
+
+ âb̂ĉd̂êf̂ĝĥîĵk̂l̂m̂n̂ôp̂q̂r̂ŝt̂ûv̂ŵx̂ŷẑ
+
+
+
+ ÃB̃C̃D̃ẼF̃G̃H̃ĨJ̃K̃L̃M̃ÑÕP̃Q̃R̃S̃T̃ŨṼW̃X̃ỸZ̃
+
+
+
+ ãb̃c̃d̃ẽf̃g̃h̃ĩj̃k̃l̃m̃ñõp̃q̃r̃s̃t̃ũṽw̃x̃ỹz̃
+
+
+
+ ĀB̄C̄D̄ĒF̄ḠH̄ĪJ̄K̄L̄M̄N̄ŌP̄Q̄R̄S̄T̄ŪV̄W̄X̄ȲZ̄
+
+
+
+ āb̄c̄d̄ēf̄ḡh̄īj̄k̄l̄m̄n̄ōp̄q̄r̄s̄t̄ūv̄w̄x̄ȳz̄
+
+
+
+ ĂB̆C̆D̆ĔF̆ĞH̆ĬJ̆K̆L̆M̆N̆ŎP̆Q̆R̆S̆T̆ŬV̆W̆X̆Y̆Z̆
+
+
+
+ ăb̆c̆d̆ĕf̆ğh̆ĭj̆k̆l̆m̆n̆ŏp̆q̆r̆s̆t̆ŭv̆w̆x̆y̆z̆
+
+
+
+ ȦḂĊḊĖḞĠḢİJ̇K̇L̇ṀṄȮṖQ̇ṘṠṪU̇V̇ẆẊẎŻ
+
+
+
+ ȧḃċḋėḟġḣi̇j̇k̇l̇ṁṅȯṗq̇ṙṡṫu̇v̇ẇẋẏż
+
+
+
+ ÄB̈C̈D̈ËF̈G̈ḦÏJ̈K̈L̈M̈N̈ÖP̈Q̈R̈S̈T̈ÜV̈ẄẌŸZ̈
+
+
+
+ äb̈c̈d̈ëf̈g̈ḧïj̈k̈l̈m̈n̈öp̈q̈r̈s̈ẗüv̈ẅẍÿz̈
+
+
+
+ ǍB̌ČĎĚF̌ǦȞǏJ̌ǨĽM̌ŇǑP̌Q̌ŘŠŤǓV̌W̌X̌Y̌Ž
+
+
+
+ ǎb̌čďěf̌ǧȟǐǰǩľm̌ňǒp̌q̌řšťǔv̌w̌x̌y̌ž
+
+
+
+ ÅB̊C̊D̊E̊F̊G̊H̊I̊J̊K̊L̊M̊N̊O̊P̊Q̊R̊S̊T̊ŮV̊W̊X̊Y̊Z̊
+
+
+
+ åb̊c̊d̊e̊f̊g̊h̊i̊j̊k̊l̊m̊n̊o̊p̊q̊r̊s̊t̊ův̊ẘx̊ẙz̊
+
+
+
+ A︠B︠C︠D︠E︠F︠G︠H︠I︠J︠K︠L︠M︠N︠O︠P︠Q︠R︠S︠T︠U︠V︠W︠X︠Y︠Z︠
+
+
+
+ a︠b︠c︠d︠e︠f︠g︠h︠i︠j︠k︠l︠m︠n︠o︠p︠q︠r︠s︠t︠u︠v︠w︠x︠y︠z︠
+
+
+
+ A︡B︡C︡D︡E︡F︡G︡H︡I︡J︡K︡L︡M︡N︡O︡P︡Q︡R︡S︡T︡U︡V︡W︡X︡Y︡Z︡
+
+
+
+ a︡b︡c︡d︡e︡f︡g︡h︡i︡j︡k︡l︡m︡n︡o︡p︡q︡r︡s︡t︡u︡v︡w︡x︡y︡z︡
+
+
+
+ A̕B̕C̕D̕E̕F̕G̕H̕I̕J̕K̕L̕M̕N̕O̕P̕Q̕R̕S̕T̕U̕V̕W̕X̕Y̕Z̕
+
+
+
+ a̕b̕c̕d̕e̕f̕g̕h̕i̕j̕k̕l̕m̕n̕o̕p̕q̕r̕s̕t̕u̕v̕w̕x̕y̕z̕
+
+
+
+ A̋B̋C̋D̋E̋F̋G̋H̋I̋J̋K̋L̋M̋N̋ŐP̋Q̋R̋S̋T̋ŰV̋W̋X̋Y̋Z̋
+
+
+
+ a̋b̋c̋d̋e̋f̋g̋h̋i̋j̋k̋l̋m̋n̋őp̋q̋r̋s̋t̋űv̋w̋x̋y̋z̋
+
+
+
+ A̐B̐C̐D̐E̐F̐G̐H̐I̐J̐K̐L̐M̐N̐O̐P̐Q̐R̐S̐T̐U̐V̐W̐X̐Y̐Z̐
+
+
+
+ a̐b̐c̐d̐e̐f̐g̐h̐i̐j̐k̐l̐m̐n̐o̐p̐q̐r̐s̐t̐u̐v̐w̐x̐y̐z̐
+
+
+
+ A̧B̧ÇḐȨF̧ĢḨI̧J̧ĶĻM̧ŅO̧P̧Q̧ŖŞŢU̧V̧W̧X̧Y̧Z̧
+
+
+
+ a̧b̧çḑȩf̧ģḩi̧j̧ķļm̧ņo̧p̧q̧ŗşţu̧v̧w̧x̧y̧z̧
+
+
+
+ ĄB̨C̨D̨ĘF̨G̨H̨ĮJ̨K̨L̨M̨N̨ǪP̨Q̨R̨S̨T̨ŲV̨W̨X̨Y̨Z̨
+
+
+
+ ąb̨c̨d̨ęf̨g̨h̨įj̨k̨l̨m̨n̨ǫp̨q̨r̨s̨t̨ųv̨w̨x̨y̨z̨
+
+
+
+ ẠḄC̣ḌẸF̣G̣ḤỊJ̣ḲḶṂṆỌP̣Q̣ṚṢṬỤṾẈX̣ỴẒ
+
+
+
+ ạḅc̣ḍẹf̣g̣ḥịj̣ḳḷṃṇọp̣q̣ṛṣṭụṿẉx̣ỵẓ
+
+
+
+ A̤B̤C̤D̤E̤F̤G̤H̤I̤J̤K̤L̤M̤N̤O̤P̤Q̤R̤S̤T̤ṲV̤W̤X̤Y̤Z̤
+
+
+
+ a̤b̤c̤d̤e̤f̤g̤h̤i̤j̤k̤l̤m̤n̤o̤p̤q̤r̤s̤t̤ṳv̤w̤x̤y̤z̤
+
+
+
+ ḀB̥C̥D̥E̥F̥G̥H̥I̥J̥K̥L̥M̥N̥O̥P̥Q̥R̥S̥T̥U̥V̥W̥X̥Y̥Z̥
+
+
+
+ ḁb̥c̥d̥e̥f̥g̥h̥i̥j̥k̥l̥m̥n̥o̥p̥q̥r̥s̥t̥u̥v̥w̥x̥y̥z̥
+
+
+
+ A̳B̳C̳D̳E̳F̳G̳H̳I̳J̳K̳L̳M̳N̳O̳P̳Q̳R̳S̳T̳U̳V̳W̳X̳Y̳Z̳
+
+
+
+ a̳b̳c̳d̳e̳f̳g̳h̳i̳j̳k̳l̳m̳n̳o̳p̳q̳r̳s̳t̳u̳v̳w̳x̳y̳z̳
+
+
+
+ A̲B̲C̲D̲E̲F̲G̲H̲I̲J̲K̲L̲M̲N̲O̲P̲Q̲R̲S̲T̲U̲V̲W̲X̲Y̲Z̲
+
+
+
+ a̲b̲c̲d̲e̲f̲g̲h̲i̲j̲k̲l̲m̲n̲o̲p̲q̲r̲s̲t̲u̲v̲w̲x̲y̲z̲
+
+
+
+ A̦B̦C̦D̦E̦F̦G̦H̦I̦J̦K̦L̦M̦N̦O̦P̦Q̦R̦ȘȚU̦V̦W̦X̦Y̦Z̦
+
+
+
+ a̦b̦c̦d̦e̦f̦g̦h̦i̦j̦k̦l̦m̦n̦o̦p̦q̦r̦șțu̦v̦w̦x̦y̦z̦
+
+
+
+ A̜B̜C̜D̜E̜F̜G̜H̜I̜J̜K̜L̜M̜N̜O̜P̜Q̜R̜S̜T̜U̜V̜W̜X̜Y̜Z̜
+
+
+
+ a̜b̜c̜d̜e̜f̜g̜h̜i̜j̜k̜l̜m̜n̜o̜p̜q̜r̜s̜t̜u̜v̜w̜x̜y̜z̜
+
+
+
+ A̮B̮C̮D̮E̮F̮G̮ḪI̮J̮K̮L̮M̮N̮O̮P̮Q̮R̮S̮T̮U̮V̮W̮X̮Y̮Z̮
+
+
+
+ a̮b̮c̮d̮e̮f̮g̮ḫi̮j̮k̮l̮m̮n̮o̮p̮q̮r̮s̮t̮u̮v̮w̮x̮y̮z̮
+
+
+
+ A︢B︢C︢D︢E︢F︢G︢H︢I︢J︢K︢L︢M︢N︢O︢P︢Q︢R︢S︢T︢U︢V︢W︢X︢Y︢Z︢
+
+
+
+ a︢b︢c︢d︢e︢f︢g︢h︢i︢j︢k︢l︢m︢n︢o︢p︢q︢r︢s︢t︢u︢v︢w︢x︢y︢z︢
+
+
+
+ A︣B︣C︣D︣E︣F︣G︣H︣I︣J︣K︣L︣M︣N︣O︣P︣Q︣R︣S︣T︣U︣V︣W︣X︣Y︣Z︣
+
+
+
+ a︣b︣c︣d︣e︣f︣g︣h︣i︣j︣k︣l︣m︣n︣o︣p︣q︣r︣s︣t︣u︣v︣w︣x︣y︣z︣
+
+
+
+ A̓B̓C̓D̓E̓F̓G̓H̓I̓J̓K̓L̓M̓N̓O̓P̓Q̓R̓S̓T̓U̓V̓W̓X̓Y̓Z̓
+
+
+
+ a̓b̓c̓d̓e̓f̓g̓h̓i̓j̓k̓l̓m̓n̓o̓p̓q̓r̓s̓t̓u̓v̓w̓x̓y̓z̓
+
+
+
+
diff --git a/data/tests/ANSEL_CRLF.GED b/data/tests/ANSEL_CRLF.GED
new file mode 100644
index 000000000..fe0b4b4e5
--- /dev/null
+++ b/data/tests/ANSEL_CRLF.GED
@@ -0,0 +1,315 @@
+0 HEAD
+1 CHAR ANSEL
+1 SOUR REGISTERED_SOURCE_NAME
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 NOTE This GEDCOM transmission contains a charcter set test. It consists
+2 CONT of a single family (two parents, many children). The parents are empty
+2 CONT in the ANSEL version of the transmission. The children contain the
+2 CONT combined letters and the special charcters (value > 128).
+2 CONT The NAME tag of each 'person' is the name of the characters tested
+2 CONT within the person. The BIRT.PLAC and DEAT.PLAC tags contain the
+2 CONT test-strings.
+2 CONT The first children contain special characters. Here the test string
+2 CONT is 'character name (test character), ...' where 'character name'
+2 CONT is the name of the character (like 'british pound') and
+2 CONT 'test character' is a single byte representing this character
+2 CONT in ANSEL.
+2 CONT The last children contain combined characters. The name tag gives
+2 CONT the name of the non-spacing character tested within the 'person'.
+2 CONT Within the name the hex-values of the non-spacing character is given
+2 CONT in ANSEL and UNICODE. The test strings contain the whole latin
+2 CONT alphabet combined with this non-spacing character: captial letters
+2 CONT in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag.
+2 CONT Example: One 'person' is named 'circle above'. The BIRT.PLAC
+2 CONT tag contains all 26 capital letters with a small ring on top.
+2 CONT Note: Not all charcters can be displayed on all computers.
+2 CONT This strongly depends on the installed fonts and codepages.
+2 CONT Many of the combined characters generated here do not even have
+2 CONT a UNICDOE code point!
+2 CONT This file based mainly on the GEDCOM 5.5 specification
+2 CONT (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip)
+2 CONT and on an updated ANSEL description in:
+2 CONT http://www.gendex.com/gedcom55/55gcappd.htm
+1 SUBM @SUBMITTER@
+1 DATE 20 JAN 1998
+0 @SUBMITTER@ SUBM
+1 NAME /H. Eichmann/
+1 ADDR email: h.eichmann@@gmx.de
+0 @FATHER@ INDI
+1 NAME /cyrillic (not possible in ANSEL)/
+1 SEX M
+1 FAMS @FAMILY@
+0 @MOTHER@ INDI
+1 NAME /greek (not possible in ANSEL)/
+1 SEX F
+1 FAMS @FAMILY@
+0 @CHILD0@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 0/
+1 BIRT
+2 PLAC slash l - uppercase (), slash o - uppercase (), slash d - uppercase (), thorn - uppercase ()
+1 DEAT
+2 PLAC ligature ae - uppercase (), ligature oe - uppercase (), miagkii znak (), middle dot (), musical flat ()
+0 @CHILD1@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 1/
+1 BIRT
+2 PLAC patent mark (), plus-or-minus (), hook o - uppercase (), hook u - uppercase ()
+1 DEAT
+2 PLAC alif (), ayn (), slash l - lowercase (), slash o - lowercase (), slash d - lowercase ()
+0 @CHILD2@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 2/
+1 BIRT
+2 PLAC thorn - lowercase (), ligature ae - lowercase (), ligature oe - lowercase (), tverdyi znak ()
+1 DEAT
+2 PLAC dotless i - lowercase (), british pound (), eth (), hook o - lowercase (), hook u - lowercase ()
+0 @CHILD3@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 3/
+1 BIRT
+2 PLAC degree sign (), script l (), phonograph copyright mark (), copyright symbol ()
+1 DEAT
+2 PLAC musical sharp (), inverted question mark (), inverted exclamation mark (), es zet ()
+0 @CHILD4@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E0 (Unicode: hook above, 0309)/low rising tone mark/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD5@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E1 (Unicode: grave, 0300)/grave accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD6@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E2 (Unicode: acute, 0301)/acute accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD7@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E3 (Unicode: circumflex, 0302)/circumflex accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD8@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E4 (Unicode: tilde, 0303)/tilde/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD9@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E5 (Unicode: macron, 0304)/macron/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD10@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E6 (Unicode: breve, 0306)/breve/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD11@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E7 (Unicode: dot above, 0307)/dot above/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD12@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E8 (Unicode: diaeresis, 0308)/umlaut (dieresis)/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD13@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E9 (Unicode: caron, 030C)/hacek/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD14@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EA (Unicode: ring above, 030A)/circle above (angstrom)/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD15@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EB (Unicode: ligature left half, FE20)/ligature, left half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD16@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EC (Unicode: ligature right half, FE21)/ligature, right half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD17@ INDI
+1 FAMC @FAMILY@
+1 NAME code: ED (Unicode: comma above right, 0315)/high comma, off center/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD18@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EE (Unicode: double acute, 030B)/double acute accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD19@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EF (Unicode: candrabindu, 0310)/candrabindu/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD20@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F0 (Unicode: cedilla, 0327)/cedilla/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD21@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F1 (Unicode: ogonek, 0328)/right hook/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD22@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F2 (Unicode: dot below, 0323)/dot below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD23@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F3 (Unicode: diaeresis below, 0324)/double dot below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD24@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F4 (Unicode: ring below, 0325)/circle below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD25@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F5 (Unicode: double low line, 0333)/double underscore/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD26@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F6 (Unicode: line below, 0332)/underscore/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD27@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F7 (Unicode: comma below, 0326)/left hook/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD28@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F8 (Unicode: left half ring below, 031C)/right cedilla/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD29@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F9 (Unicode: breve below, 032E)/half circle below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD30@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FA (Unicode: double tilde left half, FE22)/double tilde, left half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD31@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FB (Unicode: double tilde right half, FE23)/double tilde, right half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD32@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE (Unicode: comma above, 0313)/high comma, centered/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @FAMILY@ FAM
+1 HUSB @FATHER@
+1 WIFE @MOTHER@
+1 CHIL @CHILD0@
+1 CHIL @CHILD1@
+1 CHIL @CHILD2@
+1 CHIL @CHILD3@
+1 CHIL @CHILD4@
+1 CHIL @CHILD5@
+1 CHIL @CHILD6@
+1 CHIL @CHILD7@
+1 CHIL @CHILD8@
+1 CHIL @CHILD9@
+1 CHIL @CHILD10@
+1 CHIL @CHILD11@
+1 CHIL @CHILD12@
+1 CHIL @CHILD13@
+1 CHIL @CHILD14@
+1 CHIL @CHILD15@
+1 CHIL @CHILD16@
+1 CHIL @CHILD17@
+1 CHIL @CHILD18@
+1 CHIL @CHILD19@
+1 CHIL @CHILD20@
+1 CHIL @CHILD21@
+1 CHIL @CHILD22@
+1 CHIL @CHILD23@
+1 CHIL @CHILD24@
+1 CHIL @CHILD25@
+1 CHIL @CHILD26@
+1 CHIL @CHILD27@
+1 CHIL @CHILD28@
+1 CHIL @CHILD29@
+1 CHIL @CHILD30@
+1 CHIL @CHILD31@
+1 CHIL @CHILD32@
+0 TRLR
diff --git a/data/tests/ANSEL_CRLF.gramps b/data/tests/ANSEL_CRLF.gramps
new file mode 100644
index 000000000..83eb3fbb7
--- /dev/null
+++ b/data/tests/ANSEL_CRLF.gramps
@@ -0,0 +1,926 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic (not possible in ANSEL)
+
+
+
+
+ F
+
+ greek (not possible in ANSEL)
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: E0 (Unicode: hook above, 0309)
+ low rising tone mark
+
+
+
+
+
+
+ U
+
+ code: E1 (Unicode: grave, 0300)
+ grave accent
+
+
+
+
+
+
+ U
+
+ code: E2 (Unicode: acute, 0301)
+ acute accent
+
+
+
+
+
+
+ U
+
+ code: E3 (Unicode: circumflex, 0302)
+ circumflex accent
+
+
+
+
+
+
+ U
+
+ code: E4 (Unicode: tilde, 0303)
+ tilde
+
+
+
+
+
+
+ U
+
+ code: E5 (Unicode: macron, 0304)
+ macron
+
+
+
+
+
+
+ U
+
+ code: E6 (Unicode: breve, 0306)
+ breve
+
+
+
+
+
+
+ U
+
+ code: E7 (Unicode: dot above, 0307)
+ dot above
+
+
+
+
+
+
+ U
+
+ code: E8 (Unicode: diaeresis, 0308)
+ umlaut (dieresis)
+
+
+
+
+
+
+ U
+
+ code: E9 (Unicode: caron, 030C)
+ hacek
+
+
+
+
+
+
+ U
+
+ code: EA (Unicode: ring above, 030A)
+ circle above (angstrom)
+
+
+
+
+
+
+ U
+
+ code: EB (Unicode: ligature left half, FE20)
+ ligature, left half
+
+
+
+
+
+
+ U
+
+ code: EC (Unicode: ligature right half, FE21)
+ ligature, right half
+
+
+
+
+
+
+ U
+
+ code: ED (Unicode: comma above right, 0315)
+ high comma, off center
+
+
+
+
+
+
+ U
+
+ code: EE (Unicode: double acute, 030B)
+ double acute accent
+
+
+
+
+
+
+ U
+
+ code: EF (Unicode: candrabindu, 0310)
+ candrabindu
+
+
+
+
+
+
+ U
+
+ code: F0 (Unicode: cedilla, 0327)
+ cedilla
+
+
+
+
+
+
+ U
+
+ code: F1 (Unicode: ogonek, 0328)
+ right hook
+
+
+
+
+
+
+ U
+
+ code: F2 (Unicode: dot below, 0323)
+ dot below
+
+
+
+
+
+
+ U
+
+ code: F3 (Unicode: diaeresis below, 0324)
+ double dot below
+
+
+
+
+
+
+ U
+
+ code: F4 (Unicode: ring below, 0325)
+ circle below
+
+
+
+
+
+
+ U
+
+ code: F5 (Unicode: double low line, 0333)
+ double underscore
+
+
+
+
+
+
+ U
+
+ code: F6 (Unicode: line below, 0332)
+ underscore
+
+
+
+
+
+
+ U
+
+ code: F7 (Unicode: comma below, 0326)
+ left hook
+
+
+
+
+
+
+ U
+
+ code: F8 (Unicode: left half ring below, 031C)
+ right cedilla
+
+
+
+
+
+
+ U
+
+ code: F9 (Unicode: breve below, 032E)
+ half circle below
+
+
+
+
+
+
+ U
+
+ code: FA (Unicode: double tilde left half, FE22)
+ double tilde, left half
+
+
+
+
+
+
+ U
+
+ code: FB (Unicode: double tilde right half, FE23)
+ double tilde, right half
+
+
+
+
+
+
+ U
+
+ code: FE (Unicode: comma above, 0313)
+ high comma, centered
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ slash l - uppercase (Ł), slash o - uppercase (Ø), slash d - uppercase (Đ), thorn - uppercase (Þ)
+
+
+
+ ligature ae - uppercase (Æ), ligature oe - uppercase (Œ), miagkii znak (ʹ), middle dot (·), musical flat (♭)
+
+
+
+ patent mark (®), plus-or-minus (±), hook o - uppercase (Ơ), hook u - uppercase (Ư)
+
+
+
+ alif (ʼ), ayn (ʻ), slash l - lowercase (ł), slash o - lowercase (ø), slash d - lowercase (đ)
+
+
+
+ thorn - lowercase (þ), ligature ae - lowercase (æ), ligature oe - lowercase (œ), tverdyi znak (ʺ)
+
+
+
+ dotless i - lowercase (ı), british pound (£), eth (ð), hook o - lowercase (ơ), hook u - lowercase (ư)
+
+
+
+ degree sign (°), script l (ℓ), phonograph copyright mark (℗), copyright symbol (©)
+
+
+
+ musical sharp (♯), inverted question mark (¿), inverted exclamation mark (¡), es zet (ß)
+
+
+
+ ẢB̉C̉D̉ẺF̉G̉H̉ỈJ̉K̉L̉M̉N̉ỎP̉Q̉R̉S̉T̉ỦV̉W̉X̉ỶZ̉
+
+
+
+ ảb̉c̉d̉ẻf̉g̉h̉ỉj̉k̉l̉m̉n̉ỏp̉q̉r̉s̉t̉ủv̉w̉x̉ỷz̉
+
+
+
+ ÀB̀C̀D̀ÈF̀G̀H̀ÌJ̀K̀L̀M̀ǸÒP̀Q̀R̀S̀T̀ÙV̀ẀX̀ỲZ̀
+
+
+
+ àb̀c̀d̀èf̀g̀h̀ìj̀k̀l̀m̀ǹòp̀q̀r̀s̀t̀ùv̀ẁx̀ỳz̀
+
+
+
+ ÁB́ĆD́ÉF́ǴH́ÍJ́ḰĹḾŃÓṔQ́ŔŚT́ÚV́ẂX́ÝŹ
+
+
+
+ áb́ćd́éf́ǵh́íj́ḱĺḿńóṕq́ŕśt́úv́ẃx́ýź
+
+
+
+ ÂB̂ĈD̂ÊF̂ĜĤÎĴK̂L̂M̂N̂ÔP̂Q̂R̂ŜT̂ÛV̂ŴX̂ŶẐ
+
+
+
+ âb̂ĉd̂êf̂ĝĥîĵk̂l̂m̂n̂ôp̂q̂r̂ŝt̂ûv̂ŵx̂ŷẑ
+
+
+
+ ÃB̃C̃D̃ẼF̃G̃H̃ĨJ̃K̃L̃M̃ÑÕP̃Q̃R̃S̃T̃ŨṼW̃X̃ỸZ̃
+
+
+
+ ãb̃c̃d̃ẽf̃g̃h̃ĩj̃k̃l̃m̃ñõp̃q̃r̃s̃t̃ũṽw̃x̃ỹz̃
+
+
+
+ ĀB̄C̄D̄ĒF̄ḠH̄ĪJ̄K̄L̄M̄N̄ŌP̄Q̄R̄S̄T̄ŪV̄W̄X̄ȲZ̄
+
+
+
+ āb̄c̄d̄ēf̄ḡh̄īj̄k̄l̄m̄n̄ōp̄q̄r̄s̄t̄ūv̄w̄x̄ȳz̄
+
+
+
+ ĂB̆C̆D̆ĔF̆ĞH̆ĬJ̆K̆L̆M̆N̆ŎP̆Q̆R̆S̆T̆ŬV̆W̆X̆Y̆Z̆
+
+
+
+ ăb̆c̆d̆ĕf̆ğh̆ĭj̆k̆l̆m̆n̆ŏp̆q̆r̆s̆t̆ŭv̆w̆x̆y̆z̆
+
+
+
+ ȦḂĊḊĖḞĠḢİJ̇K̇L̇ṀṄȮṖQ̇ṘṠṪU̇V̇ẆẊẎŻ
+
+
+
+ ȧḃċḋėḟġḣi̇j̇k̇l̇ṁṅȯṗq̇ṙṡṫu̇v̇ẇẋẏż
+
+
+
+ ÄB̈C̈D̈ËF̈G̈ḦÏJ̈K̈L̈M̈N̈ÖP̈Q̈R̈S̈T̈ÜV̈ẄẌŸZ̈
+
+
+
+ äb̈c̈d̈ëf̈g̈ḧïj̈k̈l̈m̈n̈öp̈q̈r̈s̈ẗüv̈ẅẍÿz̈
+
+
+
+ ǍB̌ČĎĚF̌ǦȞǏJ̌ǨĽM̌ŇǑP̌Q̌ŘŠŤǓV̌W̌X̌Y̌Ž
+
+
+
+ ǎb̌čďěf̌ǧȟǐǰǩľm̌ňǒp̌q̌řšťǔv̌w̌x̌y̌ž
+
+
+
+ ÅB̊C̊D̊E̊F̊G̊H̊I̊J̊K̊L̊M̊N̊O̊P̊Q̊R̊S̊T̊ŮV̊W̊X̊Y̊Z̊
+
+
+
+ åb̊c̊d̊e̊f̊g̊h̊i̊j̊k̊l̊m̊n̊o̊p̊q̊r̊s̊t̊ův̊ẘx̊ẙz̊
+
+
+
+ A︠B︠C︠D︠E︠F︠G︠H︠I︠J︠K︠L︠M︠N︠O︠P︠Q︠R︠S︠T︠U︠V︠W︠X︠Y︠Z︠
+
+
+
+ a︠b︠c︠d︠e︠f︠g︠h︠i︠j︠k︠l︠m︠n︠o︠p︠q︠r︠s︠t︠u︠v︠w︠x︠y︠z︠
+
+
+
+ A︡B︡C︡D︡E︡F︡G︡H︡I︡J︡K︡L︡M︡N︡O︡P︡Q︡R︡S︡T︡U︡V︡W︡X︡Y︡Z︡
+
+
+
+ a︡b︡c︡d︡e︡f︡g︡h︡i︡j︡k︡l︡m︡n︡o︡p︡q︡r︡s︡t︡u︡v︡w︡x︡y︡z︡
+
+
+
+ A̕B̕C̕D̕E̕F̕G̕H̕I̕J̕K̕L̕M̕N̕O̕P̕Q̕R̕S̕T̕U̕V̕W̕X̕Y̕Z̕
+
+
+
+ a̕b̕c̕d̕e̕f̕g̕h̕i̕j̕k̕l̕m̕n̕o̕p̕q̕r̕s̕t̕u̕v̕w̕x̕y̕z̕
+
+
+
+ A̋B̋C̋D̋E̋F̋G̋H̋I̋J̋K̋L̋M̋N̋ŐP̋Q̋R̋S̋T̋ŰV̋W̋X̋Y̋Z̋
+
+
+
+ a̋b̋c̋d̋e̋f̋g̋h̋i̋j̋k̋l̋m̋n̋őp̋q̋r̋s̋t̋űv̋w̋x̋y̋z̋
+
+
+
+ A̐B̐C̐D̐E̐F̐G̐H̐I̐J̐K̐L̐M̐N̐O̐P̐Q̐R̐S̐T̐U̐V̐W̐X̐Y̐Z̐
+
+
+
+ a̐b̐c̐d̐e̐f̐g̐h̐i̐j̐k̐l̐m̐n̐o̐p̐q̐r̐s̐t̐u̐v̐w̐x̐y̐z̐
+
+
+
+ A̧B̧ÇḐȨF̧ĢḨI̧J̧ĶĻM̧ŅO̧P̧Q̧ŖŞŢU̧V̧W̧X̧Y̧Z̧
+
+
+
+ a̧b̧çḑȩf̧ģḩi̧j̧ķļm̧ņo̧p̧q̧ŗşţu̧v̧w̧x̧y̧z̧
+
+
+
+ ĄB̨C̨D̨ĘF̨G̨H̨ĮJ̨K̨L̨M̨N̨ǪP̨Q̨R̨S̨T̨ŲV̨W̨X̨Y̨Z̨
+
+
+
+ ąb̨c̨d̨ęf̨g̨h̨įj̨k̨l̨m̨n̨ǫp̨q̨r̨s̨t̨ųv̨w̨x̨y̨z̨
+
+
+
+ ẠḄC̣ḌẸF̣G̣ḤỊJ̣ḲḶṂṆỌP̣Q̣ṚṢṬỤṾẈX̣ỴẒ
+
+
+
+ ạḅc̣ḍẹf̣g̣ḥịj̣ḳḷṃṇọp̣q̣ṛṣṭụṿẉx̣ỵẓ
+
+
+
+ A̤B̤C̤D̤E̤F̤G̤H̤I̤J̤K̤L̤M̤N̤O̤P̤Q̤R̤S̤T̤ṲV̤W̤X̤Y̤Z̤
+
+
+
+ a̤b̤c̤d̤e̤f̤g̤h̤i̤j̤k̤l̤m̤n̤o̤p̤q̤r̤s̤t̤ṳv̤w̤x̤y̤z̤
+
+
+
+ ḀB̥C̥D̥E̥F̥G̥H̥I̥J̥K̥L̥M̥N̥O̥P̥Q̥R̥S̥T̥U̥V̥W̥X̥Y̥Z̥
+
+
+
+ ḁb̥c̥d̥e̥f̥g̥h̥i̥j̥k̥l̥m̥n̥o̥p̥q̥r̥s̥t̥u̥v̥w̥x̥y̥z̥
+
+
+
+ A̳B̳C̳D̳E̳F̳G̳H̳I̳J̳K̳L̳M̳N̳O̳P̳Q̳R̳S̳T̳U̳V̳W̳X̳Y̳Z̳
+
+
+
+ a̳b̳c̳d̳e̳f̳g̳h̳i̳j̳k̳l̳m̳n̳o̳p̳q̳r̳s̳t̳u̳v̳w̳x̳y̳z̳
+
+
+
+ A̲B̲C̲D̲E̲F̲G̲H̲I̲J̲K̲L̲M̲N̲O̲P̲Q̲R̲S̲T̲U̲V̲W̲X̲Y̲Z̲
+
+
+
+ a̲b̲c̲d̲e̲f̲g̲h̲i̲j̲k̲l̲m̲n̲o̲p̲q̲r̲s̲t̲u̲v̲w̲x̲y̲z̲
+
+
+
+ A̦B̦C̦D̦E̦F̦G̦H̦I̦J̦K̦L̦M̦N̦O̦P̦Q̦R̦ȘȚU̦V̦W̦X̦Y̦Z̦
+
+
+
+ a̦b̦c̦d̦e̦f̦g̦h̦i̦j̦k̦l̦m̦n̦o̦p̦q̦r̦șțu̦v̦w̦x̦y̦z̦
+
+
+
+ A̜B̜C̜D̜E̜F̜G̜H̜I̜J̜K̜L̜M̜N̜O̜P̜Q̜R̜S̜T̜U̜V̜W̜X̜Y̜Z̜
+
+
+
+ a̜b̜c̜d̜e̜f̜g̜h̜i̜j̜k̜l̜m̜n̜o̜p̜q̜r̜s̜t̜u̜v̜w̜x̜y̜z̜
+
+
+
+ A̮B̮C̮D̮E̮F̮G̮ḪI̮J̮K̮L̮M̮N̮O̮P̮Q̮R̮S̮T̮U̮V̮W̮X̮Y̮Z̮
+
+
+
+ a̮b̮c̮d̮e̮f̮g̮ḫi̮j̮k̮l̮m̮n̮o̮p̮q̮r̮s̮t̮u̮v̮w̮x̮y̮z̮
+
+
+
+ A︢B︢C︢D︢E︢F︢G︢H︢I︢J︢K︢L︢M︢N︢O︢P︢Q︢R︢S︢T︢U︢V︢W︢X︢Y︢Z︢
+
+
+
+ a︢b︢c︢d︢e︢f︢g︢h︢i︢j︢k︢l︢m︢n︢o︢p︢q︢r︢s︢t︢u︢v︢w︢x︢y︢z︢
+
+
+
+ A︣B︣C︣D︣E︣F︣G︣H︣I︣J︣K︣L︣M︣N︣O︣P︣Q︣R︣S︣T︣U︣V︣W︣X︣Y︣Z︣
+
+
+
+ a︣b︣c︣d︣e︣f︣g︣h︣i︣j︣k︣l︣m︣n︣o︣p︣q︣r︣s︣t︣u︣v︣w︣x︣y︣z︣
+
+
+
+ A̓B̓C̓D̓E̓F̓G̓H̓I̓J̓K̓L̓M̓N̓O̓P̓Q̓R̓S̓T̓U̓V̓W̓X̓Y̓Z̓
+
+
+
+ a̓b̓c̓d̓e̓f̓g̓h̓i̓j̓k̓l̓m̓n̓o̓p̓q̓r̓s̓t̓u̓v̓w̓x̓y̓z̓
+
+
+
+
diff --git a/data/tests/ANSEL_LF.GED b/data/tests/ANSEL_LF.GED
new file mode 100644
index 000000000..5996df047
--- /dev/null
+++ b/data/tests/ANSEL_LF.GED
@@ -0,0 +1,315 @@
+0 HEAD
+1 CHAR ANSEL
+1 SOUR REGISTERED_SOURCE_NAME
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 NOTE This GEDCOM transmission contains a charcter set test. It consists
+2 CONT of a single family (two parents, many children). The parents are empty
+2 CONT in the ANSEL version of the transmission. The children contain the
+2 CONT combined letters and the special charcters (value > 128).
+2 CONT The NAME tag of each 'person' is the name of the characters tested
+2 CONT within the person. The BIRT.PLAC and DEAT.PLAC tags contain the
+2 CONT test-strings.
+2 CONT The first children contain special characters. Here the test string
+2 CONT is 'character name (test character), ...' where 'character name'
+2 CONT is the name of the character (like 'british pound') and
+2 CONT 'test character' is a single byte representing this character
+2 CONT in ANSEL.
+2 CONT The last children contain combined characters. The name tag gives
+2 CONT the name of the non-spacing character tested within the 'person'.
+2 CONT Within the name the hex-values of the non-spacing character is given
+2 CONT in ANSEL and UNICODE. The test strings contain the whole latin
+2 CONT alphabet combined with this non-spacing character: captial letters
+2 CONT in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag.
+2 CONT Example: One 'person' is named 'circle above'. The BIRT.PLAC
+2 CONT tag contains all 26 capital letters with a small ring on top.
+2 CONT Note: Not all charcters can be displayed on all computers.
+2 CONT This strongly depends on the installed fonts and codepages.
+2 CONT Many of the combined characters generated here do not even have
+2 CONT a UNICDOE code point!
+2 CONT This file based mainly on the GEDCOM 5.5 specification
+2 CONT (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip)
+2 CONT and on an updated ANSEL description in:
+2 CONT http://www.gendex.com/gedcom55/55gcappd.htm
+1 SUBM @SUBMITTER@
+1 DATE 20 JAN 1998
+0 @SUBMITTER@ SUBM
+1 NAME /H. Eichmann/
+1 ADDR email: h.eichmann@@gmx.de
+0 @FATHER@ INDI
+1 NAME /cyrillic (not possible in ANSEL)/
+1 SEX M
+1 FAMS @FAMILY@
+0 @MOTHER@ INDI
+1 NAME /greek (not possible in ANSEL)/
+1 SEX F
+1 FAMS @FAMILY@
+0 @CHILD0@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 0/
+1 BIRT
+2 PLAC slash l - uppercase (), slash o - uppercase (), slash d - uppercase (), thorn - uppercase ()
+1 DEAT
+2 PLAC ligature ae - uppercase (), ligature oe - uppercase (), miagkii znak (), middle dot (), musical flat ()
+0 @CHILD1@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 1/
+1 BIRT
+2 PLAC patent mark (), plus-or-minus (), hook o - uppercase (), hook u - uppercase ()
+1 DEAT
+2 PLAC alif (), ayn (), slash l - lowercase (), slash o - lowercase (), slash d - lowercase ()
+0 @CHILD2@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 2/
+1 BIRT
+2 PLAC thorn - lowercase (), ligature ae - lowercase (), ligature oe - lowercase (), tverdyi znak ()
+1 DEAT
+2 PLAC dotless i - lowercase (), british pound (), eth (), hook o - lowercase (), hook u - lowercase ()
+0 @CHILD3@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 3/
+1 BIRT
+2 PLAC degree sign (), script l (), phonograph copyright mark (), copyright symbol ()
+1 DEAT
+2 PLAC musical sharp (), inverted question mark (), inverted exclamation mark (), es zet ()
+0 @CHILD4@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E0 (Unicode: hook above, 0309)/low rising tone mark/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD5@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E1 (Unicode: grave, 0300)/grave accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD6@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E2 (Unicode: acute, 0301)/acute accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD7@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E3 (Unicode: circumflex, 0302)/circumflex accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD8@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E4 (Unicode: tilde, 0303)/tilde/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD9@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E5 (Unicode: macron, 0304)/macron/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD10@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E6 (Unicode: breve, 0306)/breve/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD11@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E7 (Unicode: dot above, 0307)/dot above/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD12@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E8 (Unicode: diaeresis, 0308)/umlaut (dieresis)/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD13@ INDI
+1 FAMC @FAMILY@
+1 NAME code: E9 (Unicode: caron, 030C)/hacek/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD14@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EA (Unicode: ring above, 030A)/circle above (angstrom)/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD15@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EB (Unicode: ligature left half, FE20)/ligature, left half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD16@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EC (Unicode: ligature right half, FE21)/ligature, right half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD17@ INDI
+1 FAMC @FAMILY@
+1 NAME code: ED (Unicode: comma above right, 0315)/high comma, off center/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD18@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EE (Unicode: double acute, 030B)/double acute accent/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD19@ INDI
+1 FAMC @FAMILY@
+1 NAME code: EF (Unicode: candrabindu, 0310)/candrabindu/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD20@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F0 (Unicode: cedilla, 0327)/cedilla/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD21@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F1 (Unicode: ogonek, 0328)/right hook/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD22@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F2 (Unicode: dot below, 0323)/dot below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD23@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F3 (Unicode: diaeresis below, 0324)/double dot below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD24@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F4 (Unicode: ring below, 0325)/circle below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD25@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F5 (Unicode: double low line, 0333)/double underscore/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD26@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F6 (Unicode: line below, 0332)/underscore/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD27@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F7 (Unicode: comma below, 0326)/left hook/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD28@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F8 (Unicode: left half ring below, 031C)/right cedilla/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD29@ INDI
+1 FAMC @FAMILY@
+1 NAME code: F9 (Unicode: breve below, 032E)/half circle below/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD30@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FA (Unicode: double tilde left half, FE22)/double tilde, left half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD31@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FB (Unicode: double tilde right half, FE23)/double tilde, right half/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @CHILD32@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE (Unicode: comma above, 0313)/high comma, centered/
+1 BIRT
+2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ
+1 DEAT
+2 PLAC abcdefghijklmnopqrstuvwxyz
+0 @FAMILY@ FAM
+1 HUSB @FATHER@
+1 WIFE @MOTHER@
+1 CHIL @CHILD0@
+1 CHIL @CHILD1@
+1 CHIL @CHILD2@
+1 CHIL @CHILD3@
+1 CHIL @CHILD4@
+1 CHIL @CHILD5@
+1 CHIL @CHILD6@
+1 CHIL @CHILD7@
+1 CHIL @CHILD8@
+1 CHIL @CHILD9@
+1 CHIL @CHILD10@
+1 CHIL @CHILD11@
+1 CHIL @CHILD12@
+1 CHIL @CHILD13@
+1 CHIL @CHILD14@
+1 CHIL @CHILD15@
+1 CHIL @CHILD16@
+1 CHIL @CHILD17@
+1 CHIL @CHILD18@
+1 CHIL @CHILD19@
+1 CHIL @CHILD20@
+1 CHIL @CHILD21@
+1 CHIL @CHILD22@
+1 CHIL @CHILD23@
+1 CHIL @CHILD24@
+1 CHIL @CHILD25@
+1 CHIL @CHILD26@
+1 CHIL @CHILD27@
+1 CHIL @CHILD28@
+1 CHIL @CHILD29@
+1 CHIL @CHILD30@
+1 CHIL @CHILD31@
+1 CHIL @CHILD32@
+0 TRLR
diff --git a/data/tests/ANSEL_LF.gramps b/data/tests/ANSEL_LF.gramps
new file mode 100644
index 000000000..83eb3fbb7
--- /dev/null
+++ b/data/tests/ANSEL_LF.gramps
@@ -0,0 +1,926 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic (not possible in ANSEL)
+
+
+
+
+ F
+
+ greek (not possible in ANSEL)
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: E0 (Unicode: hook above, 0309)
+ low rising tone mark
+
+
+
+
+
+
+ U
+
+ code: E1 (Unicode: grave, 0300)
+ grave accent
+
+
+
+
+
+
+ U
+
+ code: E2 (Unicode: acute, 0301)
+ acute accent
+
+
+
+
+
+
+ U
+
+ code: E3 (Unicode: circumflex, 0302)
+ circumflex accent
+
+
+
+
+
+
+ U
+
+ code: E4 (Unicode: tilde, 0303)
+ tilde
+
+
+
+
+
+
+ U
+
+ code: E5 (Unicode: macron, 0304)
+ macron
+
+
+
+
+
+
+ U
+
+ code: E6 (Unicode: breve, 0306)
+ breve
+
+
+
+
+
+
+ U
+
+ code: E7 (Unicode: dot above, 0307)
+ dot above
+
+
+
+
+
+
+ U
+
+ code: E8 (Unicode: diaeresis, 0308)
+ umlaut (dieresis)
+
+
+
+
+
+
+ U
+
+ code: E9 (Unicode: caron, 030C)
+ hacek
+
+
+
+
+
+
+ U
+
+ code: EA (Unicode: ring above, 030A)
+ circle above (angstrom)
+
+
+
+
+
+
+ U
+
+ code: EB (Unicode: ligature left half, FE20)
+ ligature, left half
+
+
+
+
+
+
+ U
+
+ code: EC (Unicode: ligature right half, FE21)
+ ligature, right half
+
+
+
+
+
+
+ U
+
+ code: ED (Unicode: comma above right, 0315)
+ high comma, off center
+
+
+
+
+
+
+ U
+
+ code: EE (Unicode: double acute, 030B)
+ double acute accent
+
+
+
+
+
+
+ U
+
+ code: EF (Unicode: candrabindu, 0310)
+ candrabindu
+
+
+
+
+
+
+ U
+
+ code: F0 (Unicode: cedilla, 0327)
+ cedilla
+
+
+
+
+
+
+ U
+
+ code: F1 (Unicode: ogonek, 0328)
+ right hook
+
+
+
+
+
+
+ U
+
+ code: F2 (Unicode: dot below, 0323)
+ dot below
+
+
+
+
+
+
+ U
+
+ code: F3 (Unicode: diaeresis below, 0324)
+ double dot below
+
+
+
+
+
+
+ U
+
+ code: F4 (Unicode: ring below, 0325)
+ circle below
+
+
+
+
+
+
+ U
+
+ code: F5 (Unicode: double low line, 0333)
+ double underscore
+
+
+
+
+
+
+ U
+
+ code: F6 (Unicode: line below, 0332)
+ underscore
+
+
+
+
+
+
+ U
+
+ code: F7 (Unicode: comma below, 0326)
+ left hook
+
+
+
+
+
+
+ U
+
+ code: F8 (Unicode: left half ring below, 031C)
+ right cedilla
+
+
+
+
+
+
+ U
+
+ code: F9 (Unicode: breve below, 032E)
+ half circle below
+
+
+
+
+
+
+ U
+
+ code: FA (Unicode: double tilde left half, FE22)
+ double tilde, left half
+
+
+
+
+
+
+ U
+
+ code: FB (Unicode: double tilde right half, FE23)
+ double tilde, right half
+
+
+
+
+
+
+ U
+
+ code: FE (Unicode: comma above, 0313)
+ high comma, centered
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ slash l - uppercase (Ł), slash o - uppercase (Ø), slash d - uppercase (Đ), thorn - uppercase (Þ)
+
+
+
+ ligature ae - uppercase (Æ), ligature oe - uppercase (Œ), miagkii znak (ʹ), middle dot (·), musical flat (♭)
+
+
+
+ patent mark (®), plus-or-minus (±), hook o - uppercase (Ơ), hook u - uppercase (Ư)
+
+
+
+ alif (ʼ), ayn (ʻ), slash l - lowercase (ł), slash o - lowercase (ø), slash d - lowercase (đ)
+
+
+
+ thorn - lowercase (þ), ligature ae - lowercase (æ), ligature oe - lowercase (œ), tverdyi znak (ʺ)
+
+
+
+ dotless i - lowercase (ı), british pound (£), eth (ð), hook o - lowercase (ơ), hook u - lowercase (ư)
+
+
+
+ degree sign (°), script l (ℓ), phonograph copyright mark (℗), copyright symbol (©)
+
+
+
+ musical sharp (♯), inverted question mark (¿), inverted exclamation mark (¡), es zet (ß)
+
+
+
+ ẢB̉C̉D̉ẺF̉G̉H̉ỈJ̉K̉L̉M̉N̉ỎP̉Q̉R̉S̉T̉ỦV̉W̉X̉ỶZ̉
+
+
+
+ ảb̉c̉d̉ẻf̉g̉h̉ỉj̉k̉l̉m̉n̉ỏp̉q̉r̉s̉t̉ủv̉w̉x̉ỷz̉
+
+
+
+ ÀB̀C̀D̀ÈF̀G̀H̀ÌJ̀K̀L̀M̀ǸÒP̀Q̀R̀S̀T̀ÙV̀ẀX̀ỲZ̀
+
+
+
+ àb̀c̀d̀èf̀g̀h̀ìj̀k̀l̀m̀ǹòp̀q̀r̀s̀t̀ùv̀ẁx̀ỳz̀
+
+
+
+ ÁB́ĆD́ÉF́ǴH́ÍJ́ḰĹḾŃÓṔQ́ŔŚT́ÚV́ẂX́ÝŹ
+
+
+
+ áb́ćd́éf́ǵh́íj́ḱĺḿńóṕq́ŕśt́úv́ẃx́ýź
+
+
+
+ ÂB̂ĈD̂ÊF̂ĜĤÎĴK̂L̂M̂N̂ÔP̂Q̂R̂ŜT̂ÛV̂ŴX̂ŶẐ
+
+
+
+ âb̂ĉd̂êf̂ĝĥîĵk̂l̂m̂n̂ôp̂q̂r̂ŝt̂ûv̂ŵx̂ŷẑ
+
+
+
+ ÃB̃C̃D̃ẼF̃G̃H̃ĨJ̃K̃L̃M̃ÑÕP̃Q̃R̃S̃T̃ŨṼW̃X̃ỸZ̃
+
+
+
+ ãb̃c̃d̃ẽf̃g̃h̃ĩj̃k̃l̃m̃ñõp̃q̃r̃s̃t̃ũṽw̃x̃ỹz̃
+
+
+
+ ĀB̄C̄D̄ĒF̄ḠH̄ĪJ̄K̄L̄M̄N̄ŌP̄Q̄R̄S̄T̄ŪV̄W̄X̄ȲZ̄
+
+
+
+ āb̄c̄d̄ēf̄ḡh̄īj̄k̄l̄m̄n̄ōp̄q̄r̄s̄t̄ūv̄w̄x̄ȳz̄
+
+
+
+ ĂB̆C̆D̆ĔF̆ĞH̆ĬJ̆K̆L̆M̆N̆ŎP̆Q̆R̆S̆T̆ŬV̆W̆X̆Y̆Z̆
+
+
+
+ ăb̆c̆d̆ĕf̆ğh̆ĭj̆k̆l̆m̆n̆ŏp̆q̆r̆s̆t̆ŭv̆w̆x̆y̆z̆
+
+
+
+ ȦḂĊḊĖḞĠḢİJ̇K̇L̇ṀṄȮṖQ̇ṘṠṪU̇V̇ẆẊẎŻ
+
+
+
+ ȧḃċḋėḟġḣi̇j̇k̇l̇ṁṅȯṗq̇ṙṡṫu̇v̇ẇẋẏż
+
+
+
+ ÄB̈C̈D̈ËF̈G̈ḦÏJ̈K̈L̈M̈N̈ÖP̈Q̈R̈S̈T̈ÜV̈ẄẌŸZ̈
+
+
+
+ äb̈c̈d̈ëf̈g̈ḧïj̈k̈l̈m̈n̈öp̈q̈r̈s̈ẗüv̈ẅẍÿz̈
+
+
+
+ ǍB̌ČĎĚF̌ǦȞǏJ̌ǨĽM̌ŇǑP̌Q̌ŘŠŤǓV̌W̌X̌Y̌Ž
+
+
+
+ ǎb̌čďěf̌ǧȟǐǰǩľm̌ňǒp̌q̌řšťǔv̌w̌x̌y̌ž
+
+
+
+ ÅB̊C̊D̊E̊F̊G̊H̊I̊J̊K̊L̊M̊N̊O̊P̊Q̊R̊S̊T̊ŮV̊W̊X̊Y̊Z̊
+
+
+
+ åb̊c̊d̊e̊f̊g̊h̊i̊j̊k̊l̊m̊n̊o̊p̊q̊r̊s̊t̊ův̊ẘx̊ẙz̊
+
+
+
+ A︠B︠C︠D︠E︠F︠G︠H︠I︠J︠K︠L︠M︠N︠O︠P︠Q︠R︠S︠T︠U︠V︠W︠X︠Y︠Z︠
+
+
+
+ a︠b︠c︠d︠e︠f︠g︠h︠i︠j︠k︠l︠m︠n︠o︠p︠q︠r︠s︠t︠u︠v︠w︠x︠y︠z︠
+
+
+
+ A︡B︡C︡D︡E︡F︡G︡H︡I︡J︡K︡L︡M︡N︡O︡P︡Q︡R︡S︡T︡U︡V︡W︡X︡Y︡Z︡
+
+
+
+ a︡b︡c︡d︡e︡f︡g︡h︡i︡j︡k︡l︡m︡n︡o︡p︡q︡r︡s︡t︡u︡v︡w︡x︡y︡z︡
+
+
+
+ A̕B̕C̕D̕E̕F̕G̕H̕I̕J̕K̕L̕M̕N̕O̕P̕Q̕R̕S̕T̕U̕V̕W̕X̕Y̕Z̕
+
+
+
+ a̕b̕c̕d̕e̕f̕g̕h̕i̕j̕k̕l̕m̕n̕o̕p̕q̕r̕s̕t̕u̕v̕w̕x̕y̕z̕
+
+
+
+ A̋B̋C̋D̋E̋F̋G̋H̋I̋J̋K̋L̋M̋N̋ŐP̋Q̋R̋S̋T̋ŰV̋W̋X̋Y̋Z̋
+
+
+
+ a̋b̋c̋d̋e̋f̋g̋h̋i̋j̋k̋l̋m̋n̋őp̋q̋r̋s̋t̋űv̋w̋x̋y̋z̋
+
+
+
+ A̐B̐C̐D̐E̐F̐G̐H̐I̐J̐K̐L̐M̐N̐O̐P̐Q̐R̐S̐T̐U̐V̐W̐X̐Y̐Z̐
+
+
+
+ a̐b̐c̐d̐e̐f̐g̐h̐i̐j̐k̐l̐m̐n̐o̐p̐q̐r̐s̐t̐u̐v̐w̐x̐y̐z̐
+
+
+
+ A̧B̧ÇḐȨF̧ĢḨI̧J̧ĶĻM̧ŅO̧P̧Q̧ŖŞŢU̧V̧W̧X̧Y̧Z̧
+
+
+
+ a̧b̧çḑȩf̧ģḩi̧j̧ķļm̧ņo̧p̧q̧ŗşţu̧v̧w̧x̧y̧z̧
+
+
+
+ ĄB̨C̨D̨ĘF̨G̨H̨ĮJ̨K̨L̨M̨N̨ǪP̨Q̨R̨S̨T̨ŲV̨W̨X̨Y̨Z̨
+
+
+
+ ąb̨c̨d̨ęf̨g̨h̨įj̨k̨l̨m̨n̨ǫp̨q̨r̨s̨t̨ųv̨w̨x̨y̨z̨
+
+
+
+ ẠḄC̣ḌẸF̣G̣ḤỊJ̣ḲḶṂṆỌP̣Q̣ṚṢṬỤṾẈX̣ỴẒ
+
+
+
+ ạḅc̣ḍẹf̣g̣ḥịj̣ḳḷṃṇọp̣q̣ṛṣṭụṿẉx̣ỵẓ
+
+
+
+ A̤B̤C̤D̤E̤F̤G̤H̤I̤J̤K̤L̤M̤N̤O̤P̤Q̤R̤S̤T̤ṲV̤W̤X̤Y̤Z̤
+
+
+
+ a̤b̤c̤d̤e̤f̤g̤h̤i̤j̤k̤l̤m̤n̤o̤p̤q̤r̤s̤t̤ṳv̤w̤x̤y̤z̤
+
+
+
+ ḀB̥C̥D̥E̥F̥G̥H̥I̥J̥K̥L̥M̥N̥O̥P̥Q̥R̥S̥T̥U̥V̥W̥X̥Y̥Z̥
+
+
+
+ ḁb̥c̥d̥e̥f̥g̥h̥i̥j̥k̥l̥m̥n̥o̥p̥q̥r̥s̥t̥u̥v̥w̥x̥y̥z̥
+
+
+
+ A̳B̳C̳D̳E̳F̳G̳H̳I̳J̳K̳L̳M̳N̳O̳P̳Q̳R̳S̳T̳U̳V̳W̳X̳Y̳Z̳
+
+
+
+ a̳b̳c̳d̳e̳f̳g̳h̳i̳j̳k̳l̳m̳n̳o̳p̳q̳r̳s̳t̳u̳v̳w̳x̳y̳z̳
+
+
+
+ A̲B̲C̲D̲E̲F̲G̲H̲I̲J̲K̲L̲M̲N̲O̲P̲Q̲R̲S̲T̲U̲V̲W̲X̲Y̲Z̲
+
+
+
+ a̲b̲c̲d̲e̲f̲g̲h̲i̲j̲k̲l̲m̲n̲o̲p̲q̲r̲s̲t̲u̲v̲w̲x̲y̲z̲
+
+
+
+ A̦B̦C̦D̦E̦F̦G̦H̦I̦J̦K̦L̦M̦N̦O̦P̦Q̦R̦ȘȚU̦V̦W̦X̦Y̦Z̦
+
+
+
+ a̦b̦c̦d̦e̦f̦g̦h̦i̦j̦k̦l̦m̦n̦o̦p̦q̦r̦șțu̦v̦w̦x̦y̦z̦
+
+
+
+ A̜B̜C̜D̜E̜F̜G̜H̜I̜J̜K̜L̜M̜N̜O̜P̜Q̜R̜S̜T̜U̜V̜W̜X̜Y̜Z̜
+
+
+
+ a̜b̜c̜d̜e̜f̜g̜h̜i̜j̜k̜l̜m̜n̜o̜p̜q̜r̜s̜t̜u̜v̜w̜x̜y̜z̜
+
+
+
+ A̮B̮C̮D̮E̮F̮G̮ḪI̮J̮K̮L̮M̮N̮O̮P̮Q̮R̮S̮T̮U̮V̮W̮X̮Y̮Z̮
+
+
+
+ a̮b̮c̮d̮e̮f̮g̮ḫi̮j̮k̮l̮m̮n̮o̮p̮q̮r̮s̮t̮u̮v̮w̮x̮y̮z̮
+
+
+
+ A︢B︢C︢D︢E︢F︢G︢H︢I︢J︢K︢L︢M︢N︢O︢P︢Q︢R︢S︢T︢U︢V︢W︢X︢Y︢Z︢
+
+
+
+ a︢b︢c︢d︢e︢f︢g︢h︢i︢j︢k︢l︢m︢n︢o︢p︢q︢r︢s︢t︢u︢v︢w︢x︢y︢z︢
+
+
+
+ A︣B︣C︣D︣E︣F︣G︣H︣I︣J︣K︣L︣M︣N︣O︣P︣Q︣R︣S︣T︣U︣V︣W︣X︣Y︣Z︣
+
+
+
+ a︣b︣c︣d︣e︣f︣g︣h︣i︣j︣k︣l︣m︣n︣o︣p︣q︣r︣s︣t︣u︣v︣w︣x︣y︣z︣
+
+
+
+ A̓B̓C̓D̓E̓F̓G̓H̓I̓J̓K̓L̓M̓N̓O̓P̓Q̓R̓S̓T̓U̓V̓W̓X̓Y̓Z̓
+
+
+
+ a̓b̓c̓d̓e̓f̓g̓h̓i̓j̓k̓l̓m̓n̓o̓p̓q̓r̓s̓t̓u̓v̓w̓x̓y̓z̓
+
+
+
+
diff --git a/data/tests/Latin_1_CR.ged b/data/tests/Latin_1_CR.ged
new file mode 100644
index 000000000..29854f65c
--- /dev/null
+++ b/data/tests/Latin_1_CR.ged
@@ -0,0 +1 @@
+0 HEAD
1 SOUR NOTEPAD++
1 DEST ANY
1 DATE 3 DEC 2010
2 TIME 7:03
1 GEDC
2 VERS 5.5
2 FORM LINEAGE-LINKED
1 LANG English
1 CHAR ASCII
0 @I1@ INDI
1 NAME Paul /Culley/
1 SEX M
1 BIRT
2 DATE 1955
1 DEAT
2 DATE 2017
1 NOTE
2 CONT Table of Latin_1, ISO-8859-1 characters
2 CONT 32 [ ]
2 CONT 33 [!]
2 CONT 34 ["]
2 CONT 35 [#]
2 CONT 36 [$]
2 CONT 37 [%]
2 CONT 38 [&]
2 CONT 39 [']
2 CONT 40 [(]
2 CONT 41 [)]
2 CONT 42 [*]
2 CONT 43 [+]
2 CONT 44 [,]
2 CONT 45 [-]
2 CONT 46 [.]
2 CONT 47 [/]
2 CONT 48 [0]
2 CONT 49 [1]
2 CONT 50 [2]
2 CONT 51 [3]
2 CONT 52 [4]
2 CONT 53 [5]
2 CONT 54 [6]
2 CONT 55 [7]
2 CONT 56 [8]
2 CONT 57 [9]
2 CONT 58 [:]
2 CONT 59 [;]
2 CONT 60 [<]
2 CONT 61 [=]
2 CONT 62 [>]
2 CONT 63 [?]
2 CONT 64 [@]
2 CONT 65 [A]
2 CONT 66 [B]
2 CONT 67 [C]
2 CONT 68 [D]
2 CONT 69 [E]
2 CONT 70 [F]
2 CONT 71 [G]
2 CONT 72 [H]
2 CONT 73 [I]
2 CONT 74 [J]
2 CONT 75 [K]
2 CONT 76 [L]
2 CONT 77 [M]
2 CONT 78 [N]
2 CONT 79 [O]
2 CONT 80 [P]
2 CONT 81 [Q]
2 CONT 82 [R]
2 CONT 83 [S]
2 CONT 84 [T]
2 CONT 85 [U]
2 CONT 86 [V]
2 CONT 87 [W]
2 CONT 88 [X]
2 CONT 89 [Y]
2 CONT 90 [Z]
2 CONT 91 [[]
2 CONT 92 [\]
2 CONT 93 []]
2 CONT 94 [^]
2 CONT 95 [_]
2 CONT 96 [`]
2 CONT 97 [a]
2 CONT 98 [b]
2 CONT 99 [c]
2 CONT 100 [d]
2 CONT 101 [e]
2 CONT 102 [f]
2 CONT 103 [g]
2 CONT 104 [h]
2 CONT 105 [i]
2 CONT 106 [j]
2 CONT 107 [k]
2 CONT 108 [l]
2 CONT 109 [m]
2 CONT 110 [n]
2 CONT 111 [o]
2 CONT 112 [p]
2 CONT 113 [q]
2 CONT 114 [r]
2 CONT 115 [s]
2 CONT 116 [t]
2 CONT 117 [u]
2 CONT 118 [v]
2 CONT 119 [w]
2 CONT 120 [x]
2 CONT 121 [y]
2 CONT 122 [z]
2 CONT 123 [{]
2 CONT 124 [|]
2 CONT 125 [}]
2 CONT 126 [~]
2 CONT 160 []
2 CONT 161 []
2 CONT 162 []
2 CONT 163 []
2 CONT 164 []
2 CONT 165 []
2 CONT 166 []
2 CONT 167 []
2 CONT 168 []
2 CONT 169 []
2 CONT 170 []
2 CONT 171 []
2 CONT 172 []
2 CONT 173 []
2 CONT 174 []
2 CONT 175 []
2 CONT 176 []
2 CONT 177 []
2 CONT 178 []
2 CONT 179 []
2 CONT 180 []
2 CONT 181 []
2 CONT 182 []
2 CONT 183 []
2 CONT 184 []
2 CONT 185 []
2 CONT 186 []
2 CONT 187 []
2 CONT 188 []
2 CONT 189 []
2 CONT 190 []
2 CONT 191 []
2 CONT 192 []
2 CONT 193 []
2 CONT 194 []
2 CONT 195 []
2 CONT 196 []
2 CONT 197 []
2 CONT 198 []
2 CONT 199 []
2 CONT 200 []
2 CONT 201 []
2 CONT 202 []
2 CONT 203 []
2 CONT 204 []
2 CONT 205 []
2 CONT 206 []
2 CONT 207 []
2 CONT 208 []
2 CONT 209 []
2 CONT 210 []
2 CONT 211 []
2 CONT 212 []
2 CONT 213 []
2 CONT 214 []
2 CONT 215 []
2 CONT 216 []
2 CONT 217 []
2 CONT 218 []
2 CONT 219 []
2 CONT 220 []
2 CONT 221 []
2 CONT 222 []
2 CONT 223 []
2 CONT 224 []
2 CONT 225 []
2 CONT 226 []
2 CONT 227 []
2 CONT 228 []
2 CONT 229 []
2 CONT 230 []
2 CONT 231 []
2 CONT 232 []
2 CONT 233 []
2 CONT 234 []
2 CONT 235 []
2 CONT 236 []
2 CONT 237 []
2 CONT 238 []
2 CONT 239 []
2 CONT 240 []
2 CONT 241 []
2 CONT 242 []
2 CONT 243 []
2 CONT 244 []
2 CONT 245 []
2 CONT 246 []
2 CONT 247 []
2 CONT 248 []
2 CONT 249 []
2 CONT 250 []
2 CONT 251 []
2 CONT 252 []
2 CONT 253 []
2 CONT 254 []
2 CONT 255 []
0 TRLR
\ No newline at end of file
diff --git a/data/tests/Latin_1_CR.gramps b/data/tests/Latin_1_CR.gramps
new file mode 100644
index 000000000..99d113d6c
--- /dev/null
+++ b/data/tests/Latin_1_CR.gramps
@@ -0,0 +1,235 @@
+
+
+
+
+
+
+ Paul Culley
+ 11210 Olde Mint House Ln
+ Tomball
+ Tx
+ USA
+ 77375
+ paulr2787@gmail.com
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ Paul
+ Culley
+
+
+
+
+
+
+
+
+ Table of Latin_1, ISO-8859-1 characters
+32 [ ]
+33 [!]
+34 ["]
+35 [#]
+36 [$]
+37 [%]
+38 [&]
+39 [']
+40 [(]
+41 [)]
+42 [*]
+43 [+]
+44 [,]
+45 [-]
+46 [.]
+47 [/]
+48 [0]
+49 [1]
+50 [2]
+51 [3]
+52 [4]
+53 [5]
+54 [6]
+55 [7]
+56 [8]
+57 [9]
+58 [:]
+59 [;]
+60 [<]
+61 [=]
+62 [>]
+63 [?]
+64 [@]
+65 [A]
+66 [B]
+67 [C]
+68 [D]
+69 [E]
+70 [F]
+71 [G]
+72 [H]
+73 [I]
+74 [J]
+75 [K]
+76 [L]
+77 [M]
+78 [N]
+79 [O]
+80 [P]
+81 [Q]
+82 [R]
+83 [S]
+84 [T]
+85 [U]
+86 [V]
+87 [W]
+88 [X]
+89 [Y]
+90 [Z]
+91 [[]
+92 [\]
+93 []]
+94 [^]
+95 [_]
+96 [`]
+97 [a]
+98 [b]
+99 [c]
+100 [d]
+101 [e]
+102 [f]
+103 [g]
+104 [h]
+105 [i]
+106 [j]
+107 [k]
+108 [l]
+109 [m]
+110 [n]
+111 [o]
+112 [p]
+113 [q]
+114 [r]
+115 [s]
+116 [t]
+117 [u]
+118 [v]
+119 [w]
+120 [x]
+121 [y]
+122 [z]
+123 [{]
+124 [|]
+125 [}]
+126 [~]
+160 [ ]
+161 [¡]
+162 [¢]
+163 [£]
+164 [¤]
+165 [¥]
+166 [¦]
+167 [§]
+168 [¨]
+169 [©]
+170 [ª]
+171 [«]
+172 [¬]
+173 []
+174 [®]
+175 [¯]
+176 [°]
+177 [±]
+178 [²]
+179 [³]
+180 [´]
+181 [µ]
+182 [¶]
+183 [·]
+184 [¸]
+185 [¹]
+186 [º]
+187 [»]
+188 [¼]
+189 [½]
+190 [¾]
+191 [¿]
+192 [À]
+193 [Á]
+194 [Â]
+195 [Ã]
+196 [Ä]
+197 [Å]
+198 [Æ]
+199 [Ç]
+200 [È]
+201 [É]
+202 [Ê]
+203 [Ë]
+204 [Ì]
+205 [Í]
+206 [Î]
+207 [Ï]
+208 [Ð]
+209 [Ñ]
+210 [Ò]
+211 [Ó]
+212 [Ô]
+213 [Õ]
+214 [Ö]
+215 [×]
+216 [Ø]
+217 [Ù]
+218 [Ú]
+219 [Û]
+220 [Ü]
+221 [Ý]
+222 [Þ]
+223 [ß]
+224 [à]
+225 [á]
+226 [â]
+227 [ã]
+228 [ä]
+229 [å]
+230 [æ]
+231 [ç]
+232 [è]
+233 [é]
+234 [ê]
+235 [ë]
+236 [ì]
+237 [í]
+238 [î]
+239 [ï]
+240 [ð]
+241 [ñ]
+242 [ò]
+243 [ó]
+244 [ô]
+245 [õ]
+246 [ö]
+247 [÷]
+248 [ø]
+249 [ù]
+250 [ú]
+251 [û]
+252 [ü]
+253 [ý]
+254 [þ]
+255 [ÿ]
+
+
+
diff --git a/data/tests/Latin_1_CRLF.ged b/data/tests/Latin_1_CRLF.ged
new file mode 100644
index 000000000..119b0197a
--- /dev/null
+++ b/data/tests/Latin_1_CRLF.ged
@@ -0,0 +1,211 @@
+0 HEAD
+1 SOUR NOTEPAD++
+1 DEST ANY
+1 DATE 3 DEC 2010
+2 TIME 7:03
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 LANG English
+1 CHAR ASCII
+0 @I1@ INDI
+1 NAME Paul /Culley/
+1 SEX M
+1 BIRT
+2 DATE 1955
+1 DEAT
+2 DATE 2017
+1 NOTE
+2 CONT Table of Latin_1, ISO-8859-1 characters
+2 CONT 32 [ ]
+2 CONT 33 [!]
+2 CONT 34 ["]
+2 CONT 35 [#]
+2 CONT 36 [$]
+2 CONT 37 [%]
+2 CONT 38 [&]
+2 CONT 39 [']
+2 CONT 40 [(]
+2 CONT 41 [)]
+2 CONT 42 [*]
+2 CONT 43 [+]
+2 CONT 44 [,]
+2 CONT 45 [-]
+2 CONT 46 [.]
+2 CONT 47 [/]
+2 CONT 48 [0]
+2 CONT 49 [1]
+2 CONT 50 [2]
+2 CONT 51 [3]
+2 CONT 52 [4]
+2 CONT 53 [5]
+2 CONT 54 [6]
+2 CONT 55 [7]
+2 CONT 56 [8]
+2 CONT 57 [9]
+2 CONT 58 [:]
+2 CONT 59 [;]
+2 CONT 60 [<]
+2 CONT 61 [=]
+2 CONT 62 [>]
+2 CONT 63 [?]
+2 CONT 64 [@]
+2 CONT 65 [A]
+2 CONT 66 [B]
+2 CONT 67 [C]
+2 CONT 68 [D]
+2 CONT 69 [E]
+2 CONT 70 [F]
+2 CONT 71 [G]
+2 CONT 72 [H]
+2 CONT 73 [I]
+2 CONT 74 [J]
+2 CONT 75 [K]
+2 CONT 76 [L]
+2 CONT 77 [M]
+2 CONT 78 [N]
+2 CONT 79 [O]
+2 CONT 80 [P]
+2 CONT 81 [Q]
+2 CONT 82 [R]
+2 CONT 83 [S]
+2 CONT 84 [T]
+2 CONT 85 [U]
+2 CONT 86 [V]
+2 CONT 87 [W]
+2 CONT 88 [X]
+2 CONT 89 [Y]
+2 CONT 90 [Z]
+2 CONT 91 [[]
+2 CONT 92 [\]
+2 CONT 93 []]
+2 CONT 94 [^]
+2 CONT 95 [_]
+2 CONT 96 [`]
+2 CONT 97 [a]
+2 CONT 98 [b]
+2 CONT 99 [c]
+2 CONT 100 [d]
+2 CONT 101 [e]
+2 CONT 102 [f]
+2 CONT 103 [g]
+2 CONT 104 [h]
+2 CONT 105 [i]
+2 CONT 106 [j]
+2 CONT 107 [k]
+2 CONT 108 [l]
+2 CONT 109 [m]
+2 CONT 110 [n]
+2 CONT 111 [o]
+2 CONT 112 [p]
+2 CONT 113 [q]
+2 CONT 114 [r]
+2 CONT 115 [s]
+2 CONT 116 [t]
+2 CONT 117 [u]
+2 CONT 118 [v]
+2 CONT 119 [w]
+2 CONT 120 [x]
+2 CONT 121 [y]
+2 CONT 122 [z]
+2 CONT 123 [{]
+2 CONT 124 [|]
+2 CONT 125 [}]
+2 CONT 126 [~]
+2 CONT 160 []
+2 CONT 161 []
+2 CONT 162 []
+2 CONT 163 []
+2 CONT 164 []
+2 CONT 165 []
+2 CONT 166 []
+2 CONT 167 []
+2 CONT 168 []
+2 CONT 169 []
+2 CONT 170 []
+2 CONT 171 []
+2 CONT 172 []
+2 CONT 173 []
+2 CONT 174 []
+2 CONT 175 []
+2 CONT 176 []
+2 CONT 177 []
+2 CONT 178 []
+2 CONT 179 []
+2 CONT 180 []
+2 CONT 181 []
+2 CONT 182 []
+2 CONT 183 []
+2 CONT 184 []
+2 CONT 185 []
+2 CONT 186 []
+2 CONT 187 []
+2 CONT 188 []
+2 CONT 189 []
+2 CONT 190 []
+2 CONT 191 []
+2 CONT 192 []
+2 CONT 193 []
+2 CONT 194 []
+2 CONT 195 []
+2 CONT 196 []
+2 CONT 197 []
+2 CONT 198 []
+2 CONT 199 []
+2 CONT 200 []
+2 CONT 201 []
+2 CONT 202 []
+2 CONT 203 []
+2 CONT 204 []
+2 CONT 205 []
+2 CONT 206 []
+2 CONT 207 []
+2 CONT 208 []
+2 CONT 209 []
+2 CONT 210 []
+2 CONT 211 []
+2 CONT 212 []
+2 CONT 213 []
+2 CONT 214 []
+2 CONT 215 []
+2 CONT 216 []
+2 CONT 217 []
+2 CONT 218 []
+2 CONT 219 []
+2 CONT 220 []
+2 CONT 221 []
+2 CONT 222 []
+2 CONT 223 []
+2 CONT 224 []
+2 CONT 225 []
+2 CONT 226 []
+2 CONT 227 []
+2 CONT 228 []
+2 CONT 229 []
+2 CONT 230 []
+2 CONT 231 []
+2 CONT 232 []
+2 CONT 233 []
+2 CONT 234 []
+2 CONT 235 []
+2 CONT 236 []
+2 CONT 237 []
+2 CONT 238 []
+2 CONT 239 []
+2 CONT 240 []
+2 CONT 241 []
+2 CONT 242 []
+2 CONT 243 []
+2 CONT 244 []
+2 CONT 245 []
+2 CONT 246 []
+2 CONT 247 []
+2 CONT 248 []
+2 CONT 249 []
+2 CONT 250 []
+2 CONT 251 []
+2 CONT 252 []
+2 CONT 253 []
+2 CONT 254 []
+2 CONT 255 []
+0 TRLR
diff --git a/data/tests/Latin_1_CRLF.gramps b/data/tests/Latin_1_CRLF.gramps
new file mode 100644
index 000000000..99d113d6c
--- /dev/null
+++ b/data/tests/Latin_1_CRLF.gramps
@@ -0,0 +1,235 @@
+
+
+
+
+
+
+ Paul Culley
+ 11210 Olde Mint House Ln
+ Tomball
+ Tx
+ USA
+ 77375
+ paulr2787@gmail.com
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ Paul
+ Culley
+
+
+
+
+
+
+
+
+ Table of Latin_1, ISO-8859-1 characters
+32 [ ]
+33 [!]
+34 ["]
+35 [#]
+36 [$]
+37 [%]
+38 [&]
+39 [']
+40 [(]
+41 [)]
+42 [*]
+43 [+]
+44 [,]
+45 [-]
+46 [.]
+47 [/]
+48 [0]
+49 [1]
+50 [2]
+51 [3]
+52 [4]
+53 [5]
+54 [6]
+55 [7]
+56 [8]
+57 [9]
+58 [:]
+59 [;]
+60 [<]
+61 [=]
+62 [>]
+63 [?]
+64 [@]
+65 [A]
+66 [B]
+67 [C]
+68 [D]
+69 [E]
+70 [F]
+71 [G]
+72 [H]
+73 [I]
+74 [J]
+75 [K]
+76 [L]
+77 [M]
+78 [N]
+79 [O]
+80 [P]
+81 [Q]
+82 [R]
+83 [S]
+84 [T]
+85 [U]
+86 [V]
+87 [W]
+88 [X]
+89 [Y]
+90 [Z]
+91 [[]
+92 [\]
+93 []]
+94 [^]
+95 [_]
+96 [`]
+97 [a]
+98 [b]
+99 [c]
+100 [d]
+101 [e]
+102 [f]
+103 [g]
+104 [h]
+105 [i]
+106 [j]
+107 [k]
+108 [l]
+109 [m]
+110 [n]
+111 [o]
+112 [p]
+113 [q]
+114 [r]
+115 [s]
+116 [t]
+117 [u]
+118 [v]
+119 [w]
+120 [x]
+121 [y]
+122 [z]
+123 [{]
+124 [|]
+125 [}]
+126 [~]
+160 [ ]
+161 [¡]
+162 [¢]
+163 [£]
+164 [¤]
+165 [¥]
+166 [¦]
+167 [§]
+168 [¨]
+169 [©]
+170 [ª]
+171 [«]
+172 [¬]
+173 []
+174 [®]
+175 [¯]
+176 [°]
+177 [±]
+178 [²]
+179 [³]
+180 [´]
+181 [µ]
+182 [¶]
+183 [·]
+184 [¸]
+185 [¹]
+186 [º]
+187 [»]
+188 [¼]
+189 [½]
+190 [¾]
+191 [¿]
+192 [À]
+193 [Á]
+194 [Â]
+195 [Ã]
+196 [Ä]
+197 [Å]
+198 [Æ]
+199 [Ç]
+200 [È]
+201 [É]
+202 [Ê]
+203 [Ë]
+204 [Ì]
+205 [Í]
+206 [Î]
+207 [Ï]
+208 [Ð]
+209 [Ñ]
+210 [Ò]
+211 [Ó]
+212 [Ô]
+213 [Õ]
+214 [Ö]
+215 [×]
+216 [Ø]
+217 [Ù]
+218 [Ú]
+219 [Û]
+220 [Ü]
+221 [Ý]
+222 [Þ]
+223 [ß]
+224 [à]
+225 [á]
+226 [â]
+227 [ã]
+228 [ä]
+229 [å]
+230 [æ]
+231 [ç]
+232 [è]
+233 [é]
+234 [ê]
+235 [ë]
+236 [ì]
+237 [í]
+238 [î]
+239 [ï]
+240 [ð]
+241 [ñ]
+242 [ò]
+243 [ó]
+244 [ô]
+245 [õ]
+246 [ö]
+247 [÷]
+248 [ø]
+249 [ù]
+250 [ú]
+251 [û]
+252 [ü]
+253 [ý]
+254 [þ]
+255 [ÿ]
+
+
+
diff --git a/data/tests/Latin_1_LF.ged b/data/tests/Latin_1_LF.ged
new file mode 100644
index 000000000..7b2c3c8f7
--- /dev/null
+++ b/data/tests/Latin_1_LF.ged
@@ -0,0 +1,211 @@
+0 HEAD
+1 SOUR NOTEPAD++
+1 DEST ANY
+1 DATE 3 DEC 2010
+2 TIME 7:03
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 LANG English
+1 CHAR ASCII
+0 @I1@ INDI
+1 NAME Paul /Culley/
+1 SEX M
+1 BIRT
+2 DATE 1955
+1 DEAT
+2 DATE 2017
+1 NOTE
+2 CONT Table of Latin_1, ISO-8859-1 characters
+2 CONT 32 [ ]
+2 CONT 33 [!]
+2 CONT 34 ["]
+2 CONT 35 [#]
+2 CONT 36 [$]
+2 CONT 37 [%]
+2 CONT 38 [&]
+2 CONT 39 [']
+2 CONT 40 [(]
+2 CONT 41 [)]
+2 CONT 42 [*]
+2 CONT 43 [+]
+2 CONT 44 [,]
+2 CONT 45 [-]
+2 CONT 46 [.]
+2 CONT 47 [/]
+2 CONT 48 [0]
+2 CONT 49 [1]
+2 CONT 50 [2]
+2 CONT 51 [3]
+2 CONT 52 [4]
+2 CONT 53 [5]
+2 CONT 54 [6]
+2 CONT 55 [7]
+2 CONT 56 [8]
+2 CONT 57 [9]
+2 CONT 58 [:]
+2 CONT 59 [;]
+2 CONT 60 [<]
+2 CONT 61 [=]
+2 CONT 62 [>]
+2 CONT 63 [?]
+2 CONT 64 [@]
+2 CONT 65 [A]
+2 CONT 66 [B]
+2 CONT 67 [C]
+2 CONT 68 [D]
+2 CONT 69 [E]
+2 CONT 70 [F]
+2 CONT 71 [G]
+2 CONT 72 [H]
+2 CONT 73 [I]
+2 CONT 74 [J]
+2 CONT 75 [K]
+2 CONT 76 [L]
+2 CONT 77 [M]
+2 CONT 78 [N]
+2 CONT 79 [O]
+2 CONT 80 [P]
+2 CONT 81 [Q]
+2 CONT 82 [R]
+2 CONT 83 [S]
+2 CONT 84 [T]
+2 CONT 85 [U]
+2 CONT 86 [V]
+2 CONT 87 [W]
+2 CONT 88 [X]
+2 CONT 89 [Y]
+2 CONT 90 [Z]
+2 CONT 91 [[]
+2 CONT 92 [\]
+2 CONT 93 []]
+2 CONT 94 [^]
+2 CONT 95 [_]
+2 CONT 96 [`]
+2 CONT 97 [a]
+2 CONT 98 [b]
+2 CONT 99 [c]
+2 CONT 100 [d]
+2 CONT 101 [e]
+2 CONT 102 [f]
+2 CONT 103 [g]
+2 CONT 104 [h]
+2 CONT 105 [i]
+2 CONT 106 [j]
+2 CONT 107 [k]
+2 CONT 108 [l]
+2 CONT 109 [m]
+2 CONT 110 [n]
+2 CONT 111 [o]
+2 CONT 112 [p]
+2 CONT 113 [q]
+2 CONT 114 [r]
+2 CONT 115 [s]
+2 CONT 116 [t]
+2 CONT 117 [u]
+2 CONT 118 [v]
+2 CONT 119 [w]
+2 CONT 120 [x]
+2 CONT 121 [y]
+2 CONT 122 [z]
+2 CONT 123 [{]
+2 CONT 124 [|]
+2 CONT 125 [}]
+2 CONT 126 [~]
+2 CONT 160 []
+2 CONT 161 []
+2 CONT 162 []
+2 CONT 163 []
+2 CONT 164 []
+2 CONT 165 []
+2 CONT 166 []
+2 CONT 167 []
+2 CONT 168 []
+2 CONT 169 []
+2 CONT 170 []
+2 CONT 171 []
+2 CONT 172 []
+2 CONT 173 []
+2 CONT 174 []
+2 CONT 175 []
+2 CONT 176 []
+2 CONT 177 []
+2 CONT 178 []
+2 CONT 179 []
+2 CONT 180 []
+2 CONT 181 []
+2 CONT 182 []
+2 CONT 183 []
+2 CONT 184 []
+2 CONT 185 []
+2 CONT 186 []
+2 CONT 187 []
+2 CONT 188 []
+2 CONT 189 []
+2 CONT 190 []
+2 CONT 191 []
+2 CONT 192 []
+2 CONT 193 []
+2 CONT 194 []
+2 CONT 195 []
+2 CONT 196 []
+2 CONT 197 []
+2 CONT 198 []
+2 CONT 199 []
+2 CONT 200 []
+2 CONT 201 []
+2 CONT 202 []
+2 CONT 203 []
+2 CONT 204 []
+2 CONT 205 []
+2 CONT 206 []
+2 CONT 207 []
+2 CONT 208 []
+2 CONT 209 []
+2 CONT 210 []
+2 CONT 211 []
+2 CONT 212 []
+2 CONT 213 []
+2 CONT 214 []
+2 CONT 215 []
+2 CONT 216 []
+2 CONT 217 []
+2 CONT 218 []
+2 CONT 219 []
+2 CONT 220 []
+2 CONT 221 []
+2 CONT 222 []
+2 CONT 223 []
+2 CONT 224 []
+2 CONT 225 []
+2 CONT 226 []
+2 CONT 227 []
+2 CONT 228 []
+2 CONT 229 []
+2 CONT 230 []
+2 CONT 231 []
+2 CONT 232 []
+2 CONT 233 []
+2 CONT 234 []
+2 CONT 235 []
+2 CONT 236 []
+2 CONT 237 []
+2 CONT 238 []
+2 CONT 239 []
+2 CONT 240 []
+2 CONT 241 []
+2 CONT 242 []
+2 CONT 243 []
+2 CONT 244 []
+2 CONT 245 []
+2 CONT 246 []
+2 CONT 247 []
+2 CONT 248 []
+2 CONT 249 []
+2 CONT 250 []
+2 CONT 251 []
+2 CONT 252 []
+2 CONT 253 []
+2 CONT 254 []
+2 CONT 255 []
+0 TRLR
diff --git a/data/tests/Latin_1_LF.gramps b/data/tests/Latin_1_LF.gramps
new file mode 100644
index 000000000..99d113d6c
--- /dev/null
+++ b/data/tests/Latin_1_LF.gramps
@@ -0,0 +1,235 @@
+
+
+
+
+
+
+ Paul Culley
+ 11210 Olde Mint House Ln
+ Tomball
+ Tx
+ USA
+ 77375
+ paulr2787@gmail.com
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ Paul
+ Culley
+
+
+
+
+
+
+
+
+ Table of Latin_1, ISO-8859-1 characters
+32 [ ]
+33 [!]
+34 ["]
+35 [#]
+36 [$]
+37 [%]
+38 [&]
+39 [']
+40 [(]
+41 [)]
+42 [*]
+43 [+]
+44 [,]
+45 [-]
+46 [.]
+47 [/]
+48 [0]
+49 [1]
+50 [2]
+51 [3]
+52 [4]
+53 [5]
+54 [6]
+55 [7]
+56 [8]
+57 [9]
+58 [:]
+59 [;]
+60 [<]
+61 [=]
+62 [>]
+63 [?]
+64 [@]
+65 [A]
+66 [B]
+67 [C]
+68 [D]
+69 [E]
+70 [F]
+71 [G]
+72 [H]
+73 [I]
+74 [J]
+75 [K]
+76 [L]
+77 [M]
+78 [N]
+79 [O]
+80 [P]
+81 [Q]
+82 [R]
+83 [S]
+84 [T]
+85 [U]
+86 [V]
+87 [W]
+88 [X]
+89 [Y]
+90 [Z]
+91 [[]
+92 [\]
+93 []]
+94 [^]
+95 [_]
+96 [`]
+97 [a]
+98 [b]
+99 [c]
+100 [d]
+101 [e]
+102 [f]
+103 [g]
+104 [h]
+105 [i]
+106 [j]
+107 [k]
+108 [l]
+109 [m]
+110 [n]
+111 [o]
+112 [p]
+113 [q]
+114 [r]
+115 [s]
+116 [t]
+117 [u]
+118 [v]
+119 [w]
+120 [x]
+121 [y]
+122 [z]
+123 [{]
+124 [|]
+125 [}]
+126 [~]
+160 [ ]
+161 [¡]
+162 [¢]
+163 [£]
+164 [¤]
+165 [¥]
+166 [¦]
+167 [§]
+168 [¨]
+169 [©]
+170 [ª]
+171 [«]
+172 [¬]
+173 []
+174 [®]
+175 [¯]
+176 [°]
+177 [±]
+178 [²]
+179 [³]
+180 [´]
+181 [µ]
+182 [¶]
+183 [·]
+184 [¸]
+185 [¹]
+186 [º]
+187 [»]
+188 [¼]
+189 [½]
+190 [¾]
+191 [¿]
+192 [À]
+193 [Á]
+194 [Â]
+195 [Ã]
+196 [Ä]
+197 [Å]
+198 [Æ]
+199 [Ç]
+200 [È]
+201 [É]
+202 [Ê]
+203 [Ë]
+204 [Ì]
+205 [Í]
+206 [Î]
+207 [Ï]
+208 [Ð]
+209 [Ñ]
+210 [Ò]
+211 [Ó]
+212 [Ô]
+213 [Õ]
+214 [Ö]
+215 [×]
+216 [Ø]
+217 [Ù]
+218 [Ú]
+219 [Û]
+220 [Ü]
+221 [Ý]
+222 [Þ]
+223 [ß]
+224 [à]
+225 [á]
+226 [â]
+227 [ã]
+228 [ä]
+229 [å]
+230 [æ]
+231 [ç]
+232 [è]
+233 [é]
+234 [ê]
+235 [ë]
+236 [ì]
+237 [í]
+238 [î]
+239 [ï]
+240 [ð]
+241 [ñ]
+242 [ò]
+243 [ó]
+244 [ô]
+245 [õ]
+246 [ö]
+247 [÷]
+248 [ø]
+249 [ù]
+250 [ú]
+251 [û]
+252 [ü]
+253 [ý]
+254 [þ]
+255 [ÿ]
+
+
+
diff --git a/data/tests/UTF_16_BE_BOM_CR.GED b/data/tests/UTF_16_BE_BOM_CR.GED
new file mode 100644
index 000000000..0727a3662
Binary files /dev/null and b/data/tests/UTF_16_BE_BOM_CR.GED differ
diff --git a/data/tests/UTF_16_BE_BOM_CR.gramps b/data/tests/UTF_16_BE_BOM_CR.gramps
new file mode 100644
index 000000000..a3c1557ba
--- /dev/null
+++ b/data/tests/UTF_16_BE_BOM_CR.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_16_BE_BOM_CRLF.GED b/data/tests/UTF_16_BE_BOM_CRLF.GED
new file mode 100644
index 000000000..27b4a3def
Binary files /dev/null and b/data/tests/UTF_16_BE_BOM_CRLF.GED differ
diff --git a/data/tests/UTF_16_BE_BOM_CRLF.gramps b/data/tests/UTF_16_BE_BOM_CRLF.gramps
new file mode 100644
index 000000000..c7be9f2c9
--- /dev/null
+++ b/data/tests/UTF_16_BE_BOM_CRLF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_16_BE_BOM_LF.GED b/data/tests/UTF_16_BE_BOM_LF.GED
new file mode 100644
index 000000000..caca0ca38
Binary files /dev/null and b/data/tests/UTF_16_BE_BOM_LF.GED differ
diff --git a/data/tests/UTF_16_BE_BOM_LF.gramps b/data/tests/UTF_16_BE_BOM_LF.gramps
new file mode 100644
index 000000000..530e0ff9b
--- /dev/null
+++ b/data/tests/UTF_16_BE_BOM_LF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_16_LE_BOM_CR.GED b/data/tests/UTF_16_LE_BOM_CR.GED
new file mode 100644
index 000000000..60dcf91ec
Binary files /dev/null and b/data/tests/UTF_16_LE_BOM_CR.GED differ
diff --git a/data/tests/UTF_16_LE_BOM_CR.gramps b/data/tests/UTF_16_LE_BOM_CR.gramps
new file mode 100644
index 000000000..e9fe94f22
--- /dev/null
+++ b/data/tests/UTF_16_LE_BOM_CR.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_16_LE_BOM_CRLF.GED b/data/tests/UTF_16_LE_BOM_CRLF.GED
new file mode 100644
index 000000000..59e701e3d
Binary files /dev/null and b/data/tests/UTF_16_LE_BOM_CRLF.GED differ
diff --git a/data/tests/UTF_16_LE_BOM_CRLF.gramps b/data/tests/UTF_16_LE_BOM_CRLF.gramps
new file mode 100644
index 000000000..1571e15c7
--- /dev/null
+++ b/data/tests/UTF_16_LE_BOM_CRLF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_16_LE_BOM_LF.GED b/data/tests/UTF_16_LE_BOM_LF.GED
new file mode 100644
index 000000000..62b6ce727
Binary files /dev/null and b/data/tests/UTF_16_LE_BOM_LF.GED differ
diff --git a/data/tests/UTF_16_LE_BOM_LF.gramps b/data/tests/UTF_16_LE_BOM_LF.gramps
new file mode 100644
index 000000000..2718c107e
--- /dev/null
+++ b/data/tests/UTF_16_LE_BOM_LF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_8_BOM_CR.ged b/data/tests/UTF_8_BOM_CR.ged
new file mode 100644
index 000000000..5691e0fef
--- /dev/null
+++ b/data/tests/UTF_8_BOM_CR.ged
@@ -0,0 +1 @@
+0 HEAD
1 CHAR UTF-8
1 SOUR REGISTERED_SOURCE_NAME
1 GEDC
2 VERS 5.5
2 FORM LINEAGE-LINKED
1 NOTE UTF-8 transmission test.
2 CONT The transmission does start with a byte order mark (BOM)
2 CONT Each line is terminated using carriage return.
2 CONT This GEDCOM transmission contains a charcter set test. It consists
2 CONT of a single family (two parents, many children). The parents are used
2 CONT to test the cyrillic and greek letters. In both 'persons' the
2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some
2 CONT small letters of alphabet.
2 CONT The children contain some combined letters and special charcters.
2 CONT The NAME tag of each 'person' is the name of the characters tested
2 CONT within the person.
2 CONT The first children contain some special characters. Here the strings
2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...'
2 CONT where 'character name'is the name of the character (like 'british pound')
2 CONT and 'test character' is a single byte representing this character
2 CONT in ANSEL.
2 CONT The last children contain some combined characters. The name tag gives
2 CONT the name of the non-spacing character tested within the 'person'.
2 CONT Within the name the hex-values of the non-spacing character is given
2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are
2 CONT combined with the non-spacing character tested here and which have
2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters
2 CONT without the non-spacing part.
2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC
2 CONT tag contains all latin letters which have a UNICODE code point if
2 CONT combined with a ring above. The DEAT.PLAC tag contain the same
2 CONT charcters combined with this ring.
2 CONT Note: Not all charcters can be displayed on all computers.
2 CONT This strongly depends on the installed fonts and codepages.
2 CONT This file based on the following source:
2 CONT www.unicode.org delivered the connection from the code point names
2 CONT to the actual values. Note, that much more UNICODE characters are
2 CONT possible (like the chinese alphabet).
1 SUBM @SUBMITTER@
1 DATE 20 JAN 1998
0 @SUBMITTER@ SUBM
1 NAME /H. Eichmann/
1 ADDR email: h.eichmann@@gmx.de
0 @FATHER@ INDI
1 NAME /cyrillic/
1 BIRT
2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
1 DEAT
2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя
1 SEX M
1 FAMS @FAMILY@
0 @MOTHER@ INDI
1 NAME /greek/
1 BIRT
2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
1 DEAT
2 PLAC αβγδεζηθικλμνξοπρςστυφχψω
1 SEX F
1 FAMS @FAMILY@
0 @CHILD0@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 0/
1 BIRT
2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
1 DEAT
2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
0 @CHILD1@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 1/
1 BIRT
2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
1 DEAT
2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
0 @CHILD2@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 2/
1 BIRT
2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
1 DEAT
2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
0 @CHILD3@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 3/
1 BIRT
2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
1 DEAT
2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
0 @CHILD4@ INDI
1 FAMC @FAMILY@
1 NAME code: 0309/HOOK ABOVE/
1 BIRT
2 PLAC AEIOU,Yaeio,uy
1 DEAT
2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
0 @CHILD5@ INDI
1 FAMC @FAMILY@
1 NAME code: 0300/GRAVE/
1 BIRT
2 PLAC AEIOU,WYaei,ouwy
1 DEAT
2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
0 @CHILD6@ INDI
1 FAMC @FAMILY@
1 NAME code: 0301/ACUTE/
1 BIRT
2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
1 DEAT
2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
0 @CHILD7@ INDI
1 FAMC @FAMILY@
1 NAME code: 0302/CIRCUMFLEX/
1 BIRT
2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z
1 DEAT
2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
0 @CHILD8@ INDI
1 FAMC @FAMILY@
1 NAME code: 0303/TILDE/
1 BIRT
2 PLAC AEINO,UVYae,inouv,y
1 DEAT
2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
0 @CHILD9@ INDI
1 FAMC @FAMILY@
1 NAME code: 0304/MACRON/
1 BIRT
2 PLAC AEGIO,Uaegi,ou
1 DEAT
2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū
0 @CHILD10@ INDI
1 FAMC @FAMILY@
1 NAME code: 0306/BREVE/
1 BIRT
2 PLAC AEGIO,Uaegi,ou
1 DEAT
2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
0 @CHILD11@ INDI
1 FAMC @FAMILY@
1 NAME code: 0307/DOT ABOVE/
1 BIRT
2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
1 DEAT
2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
0 @CHILD12@ INDI
1 FAMC @FAMILY@
1 NAME code: 0308/DIAERESIS/
1 BIRT
2 PLAC AEHIO,UWXYa,ehiot,uwxy
1 DEAT
2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
0 @CHILD13@ INDI
1 FAMC @FAMILY@
1 NAME code: 030C/CARON/
1 BIRT
2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
1 DEAT
2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
0 @CHILD14@ INDI
1 FAMC @FAMILY@
1 NAME code: 030A/RING ABOVE/
1 BIRT
2 PLAC AUauw,y
1 DEAT
2 PLAC ÅŮåůẘ,ẙ
0 @CHILD15@ INDI
1 FAMC @FAMILY@
1 NAME code: FE20/LIGATURE LEFT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD16@ INDI
1 FAMC @FAMILY@
1 NAME code: FE21/LIGATURE RIGHT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD17@ INDI
1 FAMC @FAMILY@
1 NAME code: 0315/COMMA ABOVE RIGHT/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD18@ INDI
1 FAMC @FAMILY@
1 NAME code: 030B/DOUBLE ACUTE/
1 BIRT
2 PLAC OUou
1 DEAT
2 PLAC ŐŰőű
0 @CHILD19@ INDI
1 FAMC @FAMILY@
1 NAME code: 0310/CANDRABINDU/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD20@ INDI
1 FAMC @FAMILY@
1 NAME code: 0327/CEDILLA/
1 BIRT
2 PLAC CDGHK,LNRST,cdghk,lnrst
1 DEAT
2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
0 @CHILD21@ INDI
1 FAMC @FAMILY@
1 NAME code: 0328/OGONEK/
1 BIRT
2 PLAC AEIOU,aeiou
1 DEAT
2 PLAC ĄĘĮǪŲ,ąęįǫų
0 @CHILD22@ INDI
1 FAMC @FAMILY@
1 NAME code: 0323/DOT BELOW/
1 BIRT
2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
1 DEAT
2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
0 @CHILD23@ INDI
1 FAMC @FAMILY@
1 NAME code: 0324/DIAERESIS BELOW/
1 BIRT
2 PLAC Uu
1 DEAT
2 PLAC Ṳṳ
0 @CHILD24@ INDI
1 FAMC @FAMILY@
1 NAME code: 0325/RING BELOW/
1 BIRT
2 PLAC Aa
1 DEAT
2 PLAC Ḁḁ
0 @CHILD25@ INDI
1 FAMC @FAMILY@
1 NAME code: 0333/DOUBLE LOW LINE/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD26@ INDI
1 FAMC @FAMILY@
1 NAME code: 0332/LINE BELOW/
1 BIRT
2 PLAC BDKLN,RTZbd,hklnr,tz
1 DEAT
2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
0 @CHILD27@ INDI
1 FAMC @FAMILY@
1 NAME code: 0326/COMMA BELOW/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD28@ INDI
1 FAMC @FAMILY@
1 NAME code: 031C/LEFT HALF RING BELOW/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD29@ INDI
1 FAMC @FAMILY@
1 NAME code: 032E/BREVE BELOW/
1 BIRT
2 PLAC Hh
1 DEAT
2 PLAC Ḫḫ
0 @CHILD30@ INDI
1 FAMC @FAMILY@
1 NAME code: FE22/DOUBLE TILDE LEFT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD31@ INDI
1 FAMC @FAMILY@
1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD32@ INDI
1 FAMC @FAMILY@
1 NAME code: 0313/COMMA ABOVE/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @FAMILY@ FAM
1 HUSB @FATHER@
1 WIFE @MOTHER@
1 CHIL @CHILD0@
1 CHIL @CHILD1@
1 CHIL @CHILD2@
1 CHIL @CHILD3@
1 CHIL @CHILD4@
1 CHIL @CHILD5@
1 CHIL @CHILD6@
1 CHIL @CHILD7@
1 CHIL @CHILD8@
1 CHIL @CHILD9@
1 CHIL @CHILD10@
1 CHIL @CHILD11@
1 CHIL @CHILD12@
1 CHIL @CHILD13@
1 CHIL @CHILD14@
1 CHIL @CHILD15@
1 CHIL @CHILD16@
1 CHIL @CHILD17@
1 CHIL @CHILD18@
1 CHIL @CHILD19@
1 CHIL @CHILD20@
1 CHIL @CHILD21@
1 CHIL @CHILD22@
1 CHIL @CHILD23@
1 CHIL @CHILD24@
1 CHIL @CHILD25@
1 CHIL @CHILD26@
1 CHIL @CHILD27@
1 CHIL @CHILD28@
1 CHIL @CHILD29@
1 CHIL @CHILD30@
1 CHIL @CHILD31@
1 CHIL @CHILD32@
0 TRLR
\ No newline at end of file
diff --git a/data/tests/UTF_8_BOM_CR.gramps b/data/tests/UTF_8_BOM_CR.gramps
new file mode 100644
index 000000000..f033f56b5
--- /dev/null
+++ b/data/tests/UTF_8_BOM_CR.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_8_BOM_CRLF.GED b/data/tests/UTF_8_BOM_CRLF.GED
new file mode 100644
index 000000000..2034e0ec8
--- /dev/null
+++ b/data/tests/UTF_8_BOM_CRLF.GED
@@ -0,0 +1,328 @@
+0 HEAD
+1 CHAR UTF-8
+1 SOUR REGISTERED_SOURCE_NAME
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 NOTE UTF-8 transmission test.
+2 CONT The transmission does start with a byte order mark (BOM)
+2 CONT Each line is terminated using carriage return + line feed.
+2 CONT This GEDCOM transmission contains a charcter set test. It consists
+2 CONT of a single family (two parents, many children). The parents are used
+2 CONT to test the cyrillic and greek letters. In both 'persons' the
+2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some
+2 CONT small letters of alphabet.
+2 CONT The children contain some combined letters and special charcters.
+2 CONT The NAME tag of each 'person' is the name of the characters tested
+2 CONT within the person.
+2 CONT The first children contain some special characters. Here the strings
+2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...'
+2 CONT where 'character name'is the name of the character (like 'british pound')
+2 CONT and 'test character' is a single byte representing this character
+2 CONT in ANSEL.
+2 CONT The last children contain some combined characters. The name tag gives
+2 CONT the name of the non-spacing character tested within the 'person'.
+2 CONT Within the name the hex-values of the non-spacing character is given
+2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are
+2 CONT combined with the non-spacing character tested here and which have
+2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters
+2 CONT without the non-spacing part.
+2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC
+2 CONT tag contains all latin letters which have a UNICODE code point if
+2 CONT combined with a ring above. The DEAT.PLAC tag contain the same
+2 CONT charcters combined with this ring.
+2 CONT Note: Not all charcters can be displayed on all computers.
+2 CONT This strongly depends on the installed fonts and codepages.
+2 CONT This file based on the following source:
+2 CONT www.unicode.org delivered the connection from the code point names
+2 CONT to the actual values. Note, that much more UNICODE characters are
+2 CONT possible (like the chinese alphabet).
+1 SUBM @SUBMITTER@
+1 DATE 20 JAN 1998
+0 @SUBMITTER@ SUBM
+1 NAME /H. Eichmann/
+1 ADDR email: h.eichmann@@gmx.de
+0 @FATHER@ INDI
+1 NAME /cyrillic/
+1 BIRT
+2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+1 DEAT
+2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя
+1 SEX M
+1 FAMS @FAMILY@
+0 @MOTHER@ INDI
+1 NAME /greek/
+1 BIRT
+2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+1 DEAT
+2 PLAC αβγδεζηθικλμνξοπρςστυφχψω
+1 SEX F
+1 FAMS @FAMILY@
+0 @CHILD0@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 0/
+1 BIRT
+2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+1 DEAT
+2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+0 @CHILD1@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 1/
+1 BIRT
+2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+1 DEAT
+2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+0 @CHILD2@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 2/
+1 BIRT
+2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+1 DEAT
+2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+0 @CHILD3@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 3/
+1 BIRT
+2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+1 DEAT
+2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+0 @CHILD4@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0309/HOOK ABOVE/
+1 BIRT
+2 PLAC AEIOU,Yaeio,uy
+1 DEAT
+2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+0 @CHILD5@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0300/GRAVE/
+1 BIRT
+2 PLAC AEIOU,WYaei,ouwy
+1 DEAT
+2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+0 @CHILD6@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0301/ACUTE/
+1 BIRT
+2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+1 DEAT
+2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+0 @CHILD7@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0302/CIRCUMFLEX/
+1 BIRT
+2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z
+1 DEAT
+2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+0 @CHILD8@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0303/TILDE/
+1 BIRT
+2 PLAC AEINO,UVYae,inouv,y
+1 DEAT
+2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+0 @CHILD9@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0304/MACRON/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū
+0 @CHILD10@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0306/BREVE/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+0 @CHILD11@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0307/DOT ABOVE/
+1 BIRT
+2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+1 DEAT
+2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+0 @CHILD12@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0308/DIAERESIS/
+1 BIRT
+2 PLAC AEHIO,UWXYa,ehiot,uwxy
+1 DEAT
+2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+0 @CHILD13@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030C/CARON/
+1 BIRT
+2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+1 DEAT
+2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+0 @CHILD14@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030A/RING ABOVE/
+1 BIRT
+2 PLAC AUauw,y
+1 DEAT
+2 PLAC ÅŮåůẘ,ẙ
+0 @CHILD15@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE20/LIGATURE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD16@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE21/LIGATURE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD17@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0315/COMMA ABOVE RIGHT/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD18@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030B/DOUBLE ACUTE/
+1 BIRT
+2 PLAC OUou
+1 DEAT
+2 PLAC ŐŰőű
+0 @CHILD19@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0310/CANDRABINDU/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD20@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0327/CEDILLA/
+1 BIRT
+2 PLAC CDGHK,LNRST,cdghk,lnrst
+1 DEAT
+2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+0 @CHILD21@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0328/OGONEK/
+1 BIRT
+2 PLAC AEIOU,aeiou
+1 DEAT
+2 PLAC ĄĘĮǪŲ,ąęįǫų
+0 @CHILD22@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0323/DOT BELOW/
+1 BIRT
+2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+1 DEAT
+2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+0 @CHILD23@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0324/DIAERESIS BELOW/
+1 BIRT
+2 PLAC Uu
+1 DEAT
+2 PLAC Ṳṳ
+0 @CHILD24@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0325/RING BELOW/
+1 BIRT
+2 PLAC Aa
+1 DEAT
+2 PLAC Ḁḁ
+0 @CHILD25@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0333/DOUBLE LOW LINE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD26@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0332/LINE BELOW/
+1 BIRT
+2 PLAC BDKLN,RTZbd,hklnr,tz
+1 DEAT
+2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+0 @CHILD27@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0326/COMMA BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD28@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 031C/LEFT HALF RING BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD29@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 032E/BREVE BELOW/
+1 BIRT
+2 PLAC Hh
+1 DEAT
+2 PLAC Ḫḫ
+0 @CHILD30@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE22/DOUBLE TILDE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD31@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD32@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0313/COMMA ABOVE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @FAMILY@ FAM
+1 HUSB @FATHER@
+1 WIFE @MOTHER@
+1 CHIL @CHILD0@
+1 CHIL @CHILD1@
+1 CHIL @CHILD2@
+1 CHIL @CHILD3@
+1 CHIL @CHILD4@
+1 CHIL @CHILD5@
+1 CHIL @CHILD6@
+1 CHIL @CHILD7@
+1 CHIL @CHILD8@
+1 CHIL @CHILD9@
+1 CHIL @CHILD10@
+1 CHIL @CHILD11@
+1 CHIL @CHILD12@
+1 CHIL @CHILD13@
+1 CHIL @CHILD14@
+1 CHIL @CHILD15@
+1 CHIL @CHILD16@
+1 CHIL @CHILD17@
+1 CHIL @CHILD18@
+1 CHIL @CHILD19@
+1 CHIL @CHILD20@
+1 CHIL @CHILD21@
+1 CHIL @CHILD22@
+1 CHIL @CHILD23@
+1 CHIL @CHILD24@
+1 CHIL @CHILD25@
+1 CHIL @CHILD26@
+1 CHIL @CHILD27@
+1 CHIL @CHILD28@
+1 CHIL @CHILD29@
+1 CHIL @CHILD30@
+1 CHIL @CHILD31@
+1 CHIL @CHILD32@
+0 TRLR
diff --git a/data/tests/UTF_8_BOM_CRLF.gramps b/data/tests/UTF_8_BOM_CRLF.gramps
new file mode 100644
index 000000000..4a49e4f9a
--- /dev/null
+++ b/data/tests/UTF_8_BOM_CRLF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_8_BOM_LF.GED b/data/tests/UTF_8_BOM_LF.GED
new file mode 100644
index 000000000..4ddb63104
--- /dev/null
+++ b/data/tests/UTF_8_BOM_LF.GED
@@ -0,0 +1,328 @@
+0 HEAD
+1 CHAR UTF-8
+1 SOUR REGISTERED_SOURCE_NAME
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 NOTE UTF-8 transmission test.
+2 CONT The transmission does start with a byte order mark (BOM)
+2 CONT Each line is terminated using line feed.
+2 CONT This GEDCOM transmission contains a charcter set test. It consists
+2 CONT of a single family (two parents, many children). The parents are used
+2 CONT to test the cyrillic and greek letters. In both 'persons' the
+2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some
+2 CONT small letters of alphabet.
+2 CONT The children contain some combined letters and special charcters.
+2 CONT The NAME tag of each 'person' is the name of the characters tested
+2 CONT within the person.
+2 CONT The first children contain some special characters. Here the strings
+2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...'
+2 CONT where 'character name'is the name of the character (like 'british pound')
+2 CONT and 'test character' is a single byte representing this character
+2 CONT in ANSEL.
+2 CONT The last children contain some combined characters. The name tag gives
+2 CONT the name of the non-spacing character tested within the 'person'.
+2 CONT Within the name the hex-values of the non-spacing character is given
+2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are
+2 CONT combined with the non-spacing character tested here and which have
+2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters
+2 CONT without the non-spacing part.
+2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC
+2 CONT tag contains all latin letters which have a UNICODE code point if
+2 CONT combined with a ring above. The DEAT.PLAC tag contain the same
+2 CONT charcters combined with this ring.
+2 CONT Note: Not all charcters can be displayed on all computers.
+2 CONT This strongly depends on the installed fonts and codepages.
+2 CONT This file based on the following source:
+2 CONT www.unicode.org delivered the connection from the code point names
+2 CONT to the actual values. Note, that much more UNICODE characters are
+2 CONT possible (like the chinese alphabet).
+1 SUBM @SUBMITTER@
+1 DATE 20 JAN 1998
+0 @SUBMITTER@ SUBM
+1 NAME /H. Eichmann/
+1 ADDR email: h.eichmann@@gmx.de
+0 @FATHER@ INDI
+1 NAME /cyrillic/
+1 BIRT
+2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+1 DEAT
+2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя
+1 SEX M
+1 FAMS @FAMILY@
+0 @MOTHER@ INDI
+1 NAME /greek/
+1 BIRT
+2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+1 DEAT
+2 PLAC αβγδεζηθικλμνξοπρςστυφχψω
+1 SEX F
+1 FAMS @FAMILY@
+0 @CHILD0@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 0/
+1 BIRT
+2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+1 DEAT
+2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+0 @CHILD1@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 1/
+1 BIRT
+2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+1 DEAT
+2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+0 @CHILD2@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 2/
+1 BIRT
+2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+1 DEAT
+2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+0 @CHILD3@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 3/
+1 BIRT
+2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+1 DEAT
+2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+0 @CHILD4@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0309/HOOK ABOVE/
+1 BIRT
+2 PLAC AEIOU,Yaeio,uy
+1 DEAT
+2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+0 @CHILD5@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0300/GRAVE/
+1 BIRT
+2 PLAC AEIOU,WYaei,ouwy
+1 DEAT
+2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+0 @CHILD6@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0301/ACUTE/
+1 BIRT
+2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+1 DEAT
+2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+0 @CHILD7@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0302/CIRCUMFLEX/
+1 BIRT
+2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z
+1 DEAT
+2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+0 @CHILD8@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0303/TILDE/
+1 BIRT
+2 PLAC AEINO,UVYae,inouv,y
+1 DEAT
+2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+0 @CHILD9@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0304/MACRON/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū
+0 @CHILD10@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0306/BREVE/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+0 @CHILD11@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0307/DOT ABOVE/
+1 BIRT
+2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+1 DEAT
+2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+0 @CHILD12@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0308/DIAERESIS/
+1 BIRT
+2 PLAC AEHIO,UWXYa,ehiot,uwxy
+1 DEAT
+2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+0 @CHILD13@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030C/CARON/
+1 BIRT
+2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+1 DEAT
+2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+0 @CHILD14@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030A/RING ABOVE/
+1 BIRT
+2 PLAC AUauw,y
+1 DEAT
+2 PLAC ÅŮåůẘ,ẙ
+0 @CHILD15@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE20/LIGATURE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD16@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE21/LIGATURE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD17@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0315/COMMA ABOVE RIGHT/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD18@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030B/DOUBLE ACUTE/
+1 BIRT
+2 PLAC OUou
+1 DEAT
+2 PLAC ŐŰőű
+0 @CHILD19@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0310/CANDRABINDU/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD20@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0327/CEDILLA/
+1 BIRT
+2 PLAC CDGHK,LNRST,cdghk,lnrst
+1 DEAT
+2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+0 @CHILD21@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0328/OGONEK/
+1 BIRT
+2 PLAC AEIOU,aeiou
+1 DEAT
+2 PLAC ĄĘĮǪŲ,ąęįǫų
+0 @CHILD22@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0323/DOT BELOW/
+1 BIRT
+2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+1 DEAT
+2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+0 @CHILD23@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0324/DIAERESIS BELOW/
+1 BIRT
+2 PLAC Uu
+1 DEAT
+2 PLAC Ṳṳ
+0 @CHILD24@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0325/RING BELOW/
+1 BIRT
+2 PLAC Aa
+1 DEAT
+2 PLAC Ḁḁ
+0 @CHILD25@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0333/DOUBLE LOW LINE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD26@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0332/LINE BELOW/
+1 BIRT
+2 PLAC BDKLN,RTZbd,hklnr,tz
+1 DEAT
+2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+0 @CHILD27@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0326/COMMA BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD28@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 031C/LEFT HALF RING BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD29@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 032E/BREVE BELOW/
+1 BIRT
+2 PLAC Hh
+1 DEAT
+2 PLAC Ḫḫ
+0 @CHILD30@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE22/DOUBLE TILDE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD31@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD32@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0313/COMMA ABOVE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @FAMILY@ FAM
+1 HUSB @FATHER@
+1 WIFE @MOTHER@
+1 CHIL @CHILD0@
+1 CHIL @CHILD1@
+1 CHIL @CHILD2@
+1 CHIL @CHILD3@
+1 CHIL @CHILD4@
+1 CHIL @CHILD5@
+1 CHIL @CHILD6@
+1 CHIL @CHILD7@
+1 CHIL @CHILD8@
+1 CHIL @CHILD9@
+1 CHIL @CHILD10@
+1 CHIL @CHILD11@
+1 CHIL @CHILD12@
+1 CHIL @CHILD13@
+1 CHIL @CHILD14@
+1 CHIL @CHILD15@
+1 CHIL @CHILD16@
+1 CHIL @CHILD17@
+1 CHIL @CHILD18@
+1 CHIL @CHILD19@
+1 CHIL @CHILD20@
+1 CHIL @CHILD21@
+1 CHIL @CHILD22@
+1 CHIL @CHILD23@
+1 CHIL @CHILD24@
+1 CHIL @CHILD25@
+1 CHIL @CHILD26@
+1 CHIL @CHILD27@
+1 CHIL @CHILD28@
+1 CHIL @CHILD29@
+1 CHIL @CHILD30@
+1 CHIL @CHILD31@
+1 CHIL @CHILD32@
+0 TRLR
diff --git a/data/tests/UTF_8_BOM_LF.gramps b/data/tests/UTF_8_BOM_LF.gramps
new file mode 100644
index 000000000..f37368e37
--- /dev/null
+++ b/data/tests/UTF_8_BOM_LF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_8_NOBOM_CR.GED b/data/tests/UTF_8_NOBOM_CR.GED
new file mode 100644
index 000000000..fa788206f
--- /dev/null
+++ b/data/tests/UTF_8_NOBOM_CR.GED
@@ -0,0 +1 @@
+0 HEAD
1 CHAR UTF-8
1 SOUR REGISTERED_SOURCE_NAME
1 GEDC
2 VERS 5.5
2 FORM LINEAGE-LINKED
1 NOTE UTF-8 transmission test.
2 CONT The transmission does NOT start with a byte order mark (BOM)
2 CONT Each line is terminated using carriage return.
2 CONT This GEDCOM transmission contains a charcter set test. It consists
2 CONT of a single family (two parents, many children). The parents are used
2 CONT to test the cyrillic and greek letters. In both 'persons' the
2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some
2 CONT small letters of alphabet.
2 CONT The children contain some combined letters and special charcters.
2 CONT The NAME tag of each 'person' is the name of the characters tested
2 CONT within the person.
2 CONT The first children contain some special characters. Here the strings
2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...'
2 CONT where 'character name'is the name of the character (like 'british pound')
2 CONT and 'test character' is a single byte representing this character
2 CONT in ANSEL.
2 CONT The last children contain some combined characters. The name tag gives
2 CONT the name of the non-spacing character tested within the 'person'.
2 CONT Within the name the hex-values of the non-spacing character is given
2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are
2 CONT combined with the non-spacing character tested here and which have
2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters
2 CONT without the non-spacing part.
2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC
2 CONT tag contains all latin letters which have a UNICODE code point if
2 CONT combined with a ring above. The DEAT.PLAC tag contain the same
2 CONT charcters combined with this ring.
2 CONT Note: Not all charcters can be displayed on all computers.
2 CONT This strongly depends on the installed fonts and codepages.
2 CONT This file based on the following source:
2 CONT www.unicode.org delivered the connection from the code point names
2 CONT to the actual values. Note, that much more UNICODE characters are
2 CONT possible (like the chinese alphabet).
1 SUBM @SUBMITTER@
1 DATE 20 JAN 1998
0 @SUBMITTER@ SUBM
1 NAME /H. Eichmann/
1 ADDR email: h.eichmann@@gmx.de
0 @FATHER@ INDI
1 NAME /cyrillic/
1 BIRT
2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
1 DEAT
2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя
1 SEX M
1 FAMS @FAMILY@
0 @MOTHER@ INDI
1 NAME /greek/
1 BIRT
2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
1 DEAT
2 PLAC αβγδεζηθικλμνξοπρςστυφχψω
1 SEX F
1 FAMS @FAMILY@
0 @CHILD0@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 0/
1 BIRT
2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
1 DEAT
2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
0 @CHILD1@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 1/
1 BIRT
2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
1 DEAT
2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
0 @CHILD2@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 2/
1 BIRT
2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
1 DEAT
2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
0 @CHILD3@ INDI
1 FAMC @FAMILY@
1 NAME /Special Characters 3/
1 BIRT
2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
1 DEAT
2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
0 @CHILD4@ INDI
1 FAMC @FAMILY@
1 NAME code: 0309/HOOK ABOVE/
1 BIRT
2 PLAC AEIOU,Yaeio,uy
1 DEAT
2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
0 @CHILD5@ INDI
1 FAMC @FAMILY@
1 NAME code: 0300/GRAVE/
1 BIRT
2 PLAC AEIOU,WYaei,ouwy
1 DEAT
2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
0 @CHILD6@ INDI
1 FAMC @FAMILY@
1 NAME code: 0301/ACUTE/
1 BIRT
2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
1 DEAT
2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
0 @CHILD7@ INDI
1 FAMC @FAMILY@
1 NAME code: 0302/CIRCUMFLEX/
1 BIRT
2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z
1 DEAT
2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
0 @CHILD8@ INDI
1 FAMC @FAMILY@
1 NAME code: 0303/TILDE/
1 BIRT
2 PLAC AEINO,UVYae,inouv,y
1 DEAT
2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
0 @CHILD9@ INDI
1 FAMC @FAMILY@
1 NAME code: 0304/MACRON/
1 BIRT
2 PLAC AEGIO,Uaegi,ou
1 DEAT
2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū
0 @CHILD10@ INDI
1 FAMC @FAMILY@
1 NAME code: 0306/BREVE/
1 BIRT
2 PLAC AEGIO,Uaegi,ou
1 DEAT
2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
0 @CHILD11@ INDI
1 FAMC @FAMILY@
1 NAME code: 0307/DOT ABOVE/
1 BIRT
2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
1 DEAT
2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
0 @CHILD12@ INDI
1 FAMC @FAMILY@
1 NAME code: 0308/DIAERESIS/
1 BIRT
2 PLAC AEHIO,UWXYa,ehiot,uwxy
1 DEAT
2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
0 @CHILD13@ INDI
1 FAMC @FAMILY@
1 NAME code: 030C/CARON/
1 BIRT
2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
1 DEAT
2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
0 @CHILD14@ INDI
1 FAMC @FAMILY@
1 NAME code: 030A/RING ABOVE/
1 BIRT
2 PLAC AUauw,y
1 DEAT
2 PLAC ÅŮåůẘ,ẙ
0 @CHILD15@ INDI
1 FAMC @FAMILY@
1 NAME code: FE20/LIGATURE LEFT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD16@ INDI
1 FAMC @FAMILY@
1 NAME code: FE21/LIGATURE RIGHT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD17@ INDI
1 FAMC @FAMILY@
1 NAME code: 0315/COMMA ABOVE RIGHT/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD18@ INDI
1 FAMC @FAMILY@
1 NAME code: 030B/DOUBLE ACUTE/
1 BIRT
2 PLAC OUou
1 DEAT
2 PLAC ŐŰőű
0 @CHILD19@ INDI
1 FAMC @FAMILY@
1 NAME code: 0310/CANDRABINDU/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD20@ INDI
1 FAMC @FAMILY@
1 NAME code: 0327/CEDILLA/
1 BIRT
2 PLAC CDGHK,LNRST,cdghk,lnrst
1 DEAT
2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
0 @CHILD21@ INDI
1 FAMC @FAMILY@
1 NAME code: 0328/OGONEK/
1 BIRT
2 PLAC AEIOU,aeiou
1 DEAT
2 PLAC ĄĘĮǪŲ,ąęįǫų
0 @CHILD22@ INDI
1 FAMC @FAMILY@
1 NAME code: 0323/DOT BELOW/
1 BIRT
2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
1 DEAT
2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
0 @CHILD23@ INDI
1 FAMC @FAMILY@
1 NAME code: 0324/DIAERESIS BELOW/
1 BIRT
2 PLAC Uu
1 DEAT
2 PLAC Ṳṳ
0 @CHILD24@ INDI
1 FAMC @FAMILY@
1 NAME code: 0325/RING BELOW/
1 BIRT
2 PLAC Aa
1 DEAT
2 PLAC Ḁḁ
0 @CHILD25@ INDI
1 FAMC @FAMILY@
1 NAME code: 0333/DOUBLE LOW LINE/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD26@ INDI
1 FAMC @FAMILY@
1 NAME code: 0332/LINE BELOW/
1 BIRT
2 PLAC BDKLN,RTZbd,hklnr,tz
1 DEAT
2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
0 @CHILD27@ INDI
1 FAMC @FAMILY@
1 NAME code: 0326/COMMA BELOW/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD28@ INDI
1 FAMC @FAMILY@
1 NAME code: 031C/LEFT HALF RING BELOW/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD29@ INDI
1 FAMC @FAMILY@
1 NAME code: 032E/BREVE BELOW/
1 BIRT
2 PLAC Hh
1 DEAT
2 PLAC Ḫḫ
0 @CHILD30@ INDI
1 FAMC @FAMILY@
1 NAME code: FE22/DOUBLE TILDE LEFT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD31@ INDI
1 FAMC @FAMILY@
1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @CHILD32@ INDI
1 FAMC @FAMILY@
1 NAME code: 0313/COMMA ABOVE/
1 BIRT
2 PLAC - none -
1 DEAT
2 PLAC - none -
0 @FAMILY@ FAM
1 HUSB @FATHER@
1 WIFE @MOTHER@
1 CHIL @CHILD0@
1 CHIL @CHILD1@
1 CHIL @CHILD2@
1 CHIL @CHILD3@
1 CHIL @CHILD4@
1 CHIL @CHILD5@
1 CHIL @CHILD6@
1 CHIL @CHILD7@
1 CHIL @CHILD8@
1 CHIL @CHILD9@
1 CHIL @CHILD10@
1 CHIL @CHILD11@
1 CHIL @CHILD12@
1 CHIL @CHILD13@
1 CHIL @CHILD14@
1 CHIL @CHILD15@
1 CHIL @CHILD16@
1 CHIL @CHILD17@
1 CHIL @CHILD18@
1 CHIL @CHILD19@
1 CHIL @CHILD20@
1 CHIL @CHILD21@
1 CHIL @CHILD22@
1 CHIL @CHILD23@
1 CHIL @CHILD24@
1 CHIL @CHILD25@
1 CHIL @CHILD26@
1 CHIL @CHILD27@
1 CHIL @CHILD28@
1 CHIL @CHILD29@
1 CHIL @CHILD30@
1 CHIL @CHILD31@
1 CHIL @CHILD32@
0 TRLR
\ No newline at end of file
diff --git a/data/tests/UTF_8_NOBOM_CR.gramps b/data/tests/UTF_8_NOBOM_CR.gramps
new file mode 100644
index 000000000..64540e707
--- /dev/null
+++ b/data/tests/UTF_8_NOBOM_CR.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_8_NOBOM_CRLF.GED b/data/tests/UTF_8_NOBOM_CRLF.GED
new file mode 100644
index 000000000..747d59f1e
--- /dev/null
+++ b/data/tests/UTF_8_NOBOM_CRLF.GED
@@ -0,0 +1,328 @@
+0 HEAD
+1 CHAR UTF-8
+1 SOUR REGISTERED_SOURCE_NAME
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 NOTE UTF-8 transmission test.
+2 CONT The transmission does NOT start with a byte order mark (BOM)
+2 CONT Each line is terminated using carriage return + line feed.
+2 CONT This GEDCOM transmission contains a charcter set test. It consists
+2 CONT of a single family (two parents, many children). The parents are used
+2 CONT to test the cyrillic and greek letters. In both 'persons' the
+2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some
+2 CONT small letters of alphabet.
+2 CONT The children contain some combined letters and special charcters.
+2 CONT The NAME tag of each 'person' is the name of the characters tested
+2 CONT within the person.
+2 CONT The first children contain some special characters. Here the strings
+2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...'
+2 CONT where 'character name'is the name of the character (like 'british pound')
+2 CONT and 'test character' is a single byte representing this character
+2 CONT in ANSEL.
+2 CONT The last children contain some combined characters. The name tag gives
+2 CONT the name of the non-spacing character tested within the 'person'.
+2 CONT Within the name the hex-values of the non-spacing character is given
+2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are
+2 CONT combined with the non-spacing character tested here and which have
+2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters
+2 CONT without the non-spacing part.
+2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC
+2 CONT tag contains all latin letters which have a UNICODE code point if
+2 CONT combined with a ring above. The DEAT.PLAC tag contain the same
+2 CONT charcters combined with this ring.
+2 CONT Note: Not all charcters can be displayed on all computers.
+2 CONT This strongly depends on the installed fonts and codepages.
+2 CONT This file based on the following source:
+2 CONT www.unicode.org delivered the connection from the code point names
+2 CONT to the actual values. Note, that much more UNICODE characters are
+2 CONT possible (like the chinese alphabet).
+1 SUBM @SUBMITTER@
+1 DATE 20 JAN 1998
+0 @SUBMITTER@ SUBM
+1 NAME /H. Eichmann/
+1 ADDR email: h.eichmann@@gmx.de
+0 @FATHER@ INDI
+1 NAME /cyrillic/
+1 BIRT
+2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+1 DEAT
+2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя
+1 SEX M
+1 FAMS @FAMILY@
+0 @MOTHER@ INDI
+1 NAME /greek/
+1 BIRT
+2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+1 DEAT
+2 PLAC αβγδεζηθικλμνξοπρςστυφχψω
+1 SEX F
+1 FAMS @FAMILY@
+0 @CHILD0@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 0/
+1 BIRT
+2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+1 DEAT
+2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+0 @CHILD1@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 1/
+1 BIRT
+2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+1 DEAT
+2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+0 @CHILD2@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 2/
+1 BIRT
+2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+1 DEAT
+2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+0 @CHILD3@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 3/
+1 BIRT
+2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+1 DEAT
+2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+0 @CHILD4@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0309/HOOK ABOVE/
+1 BIRT
+2 PLAC AEIOU,Yaeio,uy
+1 DEAT
+2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+0 @CHILD5@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0300/GRAVE/
+1 BIRT
+2 PLAC AEIOU,WYaei,ouwy
+1 DEAT
+2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+0 @CHILD6@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0301/ACUTE/
+1 BIRT
+2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+1 DEAT
+2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+0 @CHILD7@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0302/CIRCUMFLEX/
+1 BIRT
+2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z
+1 DEAT
+2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+0 @CHILD8@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0303/TILDE/
+1 BIRT
+2 PLAC AEINO,UVYae,inouv,y
+1 DEAT
+2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+0 @CHILD9@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0304/MACRON/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū
+0 @CHILD10@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0306/BREVE/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+0 @CHILD11@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0307/DOT ABOVE/
+1 BIRT
+2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+1 DEAT
+2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+0 @CHILD12@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0308/DIAERESIS/
+1 BIRT
+2 PLAC AEHIO,UWXYa,ehiot,uwxy
+1 DEAT
+2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+0 @CHILD13@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030C/CARON/
+1 BIRT
+2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+1 DEAT
+2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+0 @CHILD14@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030A/RING ABOVE/
+1 BIRT
+2 PLAC AUauw,y
+1 DEAT
+2 PLAC ÅŮåůẘ,ẙ
+0 @CHILD15@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE20/LIGATURE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD16@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE21/LIGATURE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD17@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0315/COMMA ABOVE RIGHT/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD18@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030B/DOUBLE ACUTE/
+1 BIRT
+2 PLAC OUou
+1 DEAT
+2 PLAC ŐŰőű
+0 @CHILD19@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0310/CANDRABINDU/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD20@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0327/CEDILLA/
+1 BIRT
+2 PLAC CDGHK,LNRST,cdghk,lnrst
+1 DEAT
+2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+0 @CHILD21@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0328/OGONEK/
+1 BIRT
+2 PLAC AEIOU,aeiou
+1 DEAT
+2 PLAC ĄĘĮǪŲ,ąęįǫų
+0 @CHILD22@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0323/DOT BELOW/
+1 BIRT
+2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+1 DEAT
+2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+0 @CHILD23@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0324/DIAERESIS BELOW/
+1 BIRT
+2 PLAC Uu
+1 DEAT
+2 PLAC Ṳṳ
+0 @CHILD24@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0325/RING BELOW/
+1 BIRT
+2 PLAC Aa
+1 DEAT
+2 PLAC Ḁḁ
+0 @CHILD25@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0333/DOUBLE LOW LINE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD26@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0332/LINE BELOW/
+1 BIRT
+2 PLAC BDKLN,RTZbd,hklnr,tz
+1 DEAT
+2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+0 @CHILD27@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0326/COMMA BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD28@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 031C/LEFT HALF RING BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD29@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 032E/BREVE BELOW/
+1 BIRT
+2 PLAC Hh
+1 DEAT
+2 PLAC Ḫḫ
+0 @CHILD30@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE22/DOUBLE TILDE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD31@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD32@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0313/COMMA ABOVE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @FAMILY@ FAM
+1 HUSB @FATHER@
+1 WIFE @MOTHER@
+1 CHIL @CHILD0@
+1 CHIL @CHILD1@
+1 CHIL @CHILD2@
+1 CHIL @CHILD3@
+1 CHIL @CHILD4@
+1 CHIL @CHILD5@
+1 CHIL @CHILD6@
+1 CHIL @CHILD7@
+1 CHIL @CHILD8@
+1 CHIL @CHILD9@
+1 CHIL @CHILD10@
+1 CHIL @CHILD11@
+1 CHIL @CHILD12@
+1 CHIL @CHILD13@
+1 CHIL @CHILD14@
+1 CHIL @CHILD15@
+1 CHIL @CHILD16@
+1 CHIL @CHILD17@
+1 CHIL @CHILD18@
+1 CHIL @CHILD19@
+1 CHIL @CHILD20@
+1 CHIL @CHILD21@
+1 CHIL @CHILD22@
+1 CHIL @CHILD23@
+1 CHIL @CHILD24@
+1 CHIL @CHILD25@
+1 CHIL @CHILD26@
+1 CHIL @CHILD27@
+1 CHIL @CHILD28@
+1 CHIL @CHILD29@
+1 CHIL @CHILD30@
+1 CHIL @CHILD31@
+1 CHIL @CHILD32@
+0 TRLR
diff --git a/data/tests/UTF_8_NOBOM_CRLF.gramps b/data/tests/UTF_8_NOBOM_CRLF.gramps
new file mode 100644
index 000000000..c7e30b86e
--- /dev/null
+++ b/data/tests/UTF_8_NOBOM_CRLF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/UTF_8_NOBOM_LF.GED b/data/tests/UTF_8_NOBOM_LF.GED
new file mode 100644
index 000000000..d95032c4d
--- /dev/null
+++ b/data/tests/UTF_8_NOBOM_LF.GED
@@ -0,0 +1,328 @@
+0 HEAD
+1 CHAR UTF-8
+1 SOUR REGISTERED_SOURCE_NAME
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 NOTE UTF-8 transmission test.
+2 CONT The transmission does NOT start with a byte order mark (BOM)
+2 CONT Each line is terminated using line feed.
+2 CONT This GEDCOM transmission contains a charcter set test. It consists
+2 CONT of a single family (two parents, many children). The parents are used
+2 CONT to test the cyrillic and greek letters. In both 'persons' the
+2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some
+2 CONT small letters of alphabet.
+2 CONT The children contain some combined letters and special charcters.
+2 CONT The NAME tag of each 'person' is the name of the characters tested
+2 CONT within the person.
+2 CONT The first children contain some special characters. Here the strings
+2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...'
+2 CONT where 'character name'is the name of the character (like 'british pound')
+2 CONT and 'test character' is a single byte representing this character
+2 CONT in ANSEL.
+2 CONT The last children contain some combined characters. The name tag gives
+2 CONT the name of the non-spacing character tested within the 'person'.
+2 CONT Within the name the hex-values of the non-spacing character is given
+2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are
+2 CONT combined with the non-spacing character tested here and which have
+2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters
+2 CONT without the non-spacing part.
+2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC
+2 CONT tag contains all latin letters which have a UNICODE code point if
+2 CONT combined with a ring above. The DEAT.PLAC tag contain the same
+2 CONT charcters combined with this ring.
+2 CONT Note: Not all charcters can be displayed on all computers.
+2 CONT This strongly depends on the installed fonts and codepages.
+2 CONT This file based on the following source:
+2 CONT www.unicode.org delivered the connection from the code point names
+2 CONT to the actual values. Note, that much more UNICODE characters are
+2 CONT possible (like the chinese alphabet).
+1 SUBM @SUBMITTER@
+1 DATE 20 JAN 1998
+0 @SUBMITTER@ SUBM
+1 NAME /H. Eichmann/
+1 ADDR email: h.eichmann@@gmx.de
+0 @FATHER@ INDI
+1 NAME /cyrillic/
+1 BIRT
+2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+1 DEAT
+2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя
+1 SEX M
+1 FAMS @FAMILY@
+0 @MOTHER@ INDI
+1 NAME /greek/
+1 BIRT
+2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+1 DEAT
+2 PLAC αβγδεζηθικλμνξοπρςστυφχψω
+1 SEX F
+1 FAMS @FAMILY@
+0 @CHILD0@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 0/
+1 BIRT
+2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+1 DEAT
+2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+0 @CHILD1@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 1/
+1 BIRT
+2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+1 DEAT
+2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+0 @CHILD2@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 2/
+1 BIRT
+2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+1 DEAT
+2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+0 @CHILD3@ INDI
+1 FAMC @FAMILY@
+1 NAME /Special Characters 3/
+1 BIRT
+2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+1 DEAT
+2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+0 @CHILD4@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0309/HOOK ABOVE/
+1 BIRT
+2 PLAC AEIOU,Yaeio,uy
+1 DEAT
+2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+0 @CHILD5@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0300/GRAVE/
+1 BIRT
+2 PLAC AEIOU,WYaei,ouwy
+1 DEAT
+2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+0 @CHILD6@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0301/ACUTE/
+1 BIRT
+2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+1 DEAT
+2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+0 @CHILD7@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0302/CIRCUMFLEX/
+1 BIRT
+2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z
+1 DEAT
+2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+0 @CHILD8@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0303/TILDE/
+1 BIRT
+2 PLAC AEINO,UVYae,inouv,y
+1 DEAT
+2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+0 @CHILD9@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0304/MACRON/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū
+0 @CHILD10@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0306/BREVE/
+1 BIRT
+2 PLAC AEGIO,Uaegi,ou
+1 DEAT
+2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+0 @CHILD11@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0307/DOT ABOVE/
+1 BIRT
+2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+1 DEAT
+2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+0 @CHILD12@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0308/DIAERESIS/
+1 BIRT
+2 PLAC AEHIO,UWXYa,ehiot,uwxy
+1 DEAT
+2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+0 @CHILD13@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030C/CARON/
+1 BIRT
+2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+1 DEAT
+2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+0 @CHILD14@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030A/RING ABOVE/
+1 BIRT
+2 PLAC AUauw,y
+1 DEAT
+2 PLAC ÅŮåůẘ,ẙ
+0 @CHILD15@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE20/LIGATURE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD16@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE21/LIGATURE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD17@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0315/COMMA ABOVE RIGHT/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD18@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 030B/DOUBLE ACUTE/
+1 BIRT
+2 PLAC OUou
+1 DEAT
+2 PLAC ŐŰőű
+0 @CHILD19@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0310/CANDRABINDU/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD20@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0327/CEDILLA/
+1 BIRT
+2 PLAC CDGHK,LNRST,cdghk,lnrst
+1 DEAT
+2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+0 @CHILD21@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0328/OGONEK/
+1 BIRT
+2 PLAC AEIOU,aeiou
+1 DEAT
+2 PLAC ĄĘĮǪŲ,ąęįǫų
+0 @CHILD22@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0323/DOT BELOW/
+1 BIRT
+2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+1 DEAT
+2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+0 @CHILD23@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0324/DIAERESIS BELOW/
+1 BIRT
+2 PLAC Uu
+1 DEAT
+2 PLAC Ṳṳ
+0 @CHILD24@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0325/RING BELOW/
+1 BIRT
+2 PLAC Aa
+1 DEAT
+2 PLAC Ḁḁ
+0 @CHILD25@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0333/DOUBLE LOW LINE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD26@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0332/LINE BELOW/
+1 BIRT
+2 PLAC BDKLN,RTZbd,hklnr,tz
+1 DEAT
+2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+0 @CHILD27@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0326/COMMA BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD28@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 031C/LEFT HALF RING BELOW/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD29@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 032E/BREVE BELOW/
+1 BIRT
+2 PLAC Hh
+1 DEAT
+2 PLAC Ḫḫ
+0 @CHILD30@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE22/DOUBLE TILDE LEFT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD31@ INDI
+1 FAMC @FAMILY@
+1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @CHILD32@ INDI
+1 FAMC @FAMILY@
+1 NAME code: 0313/COMMA ABOVE/
+1 BIRT
+2 PLAC - none -
+1 DEAT
+2 PLAC - none -
+0 @FAMILY@ FAM
+1 HUSB @FATHER@
+1 WIFE @MOTHER@
+1 CHIL @CHILD0@
+1 CHIL @CHILD1@
+1 CHIL @CHILD2@
+1 CHIL @CHILD3@
+1 CHIL @CHILD4@
+1 CHIL @CHILD5@
+1 CHIL @CHILD6@
+1 CHIL @CHILD7@
+1 CHIL @CHILD8@
+1 CHIL @CHILD9@
+1 CHIL @CHILD10@
+1 CHIL @CHILD11@
+1 CHIL @CHILD12@
+1 CHIL @CHILD13@
+1 CHIL @CHILD14@
+1 CHIL @CHILD15@
+1 CHIL @CHILD16@
+1 CHIL @CHILD17@
+1 CHIL @CHILD18@
+1 CHIL @CHILD19@
+1 CHIL @CHILD20@
+1 CHIL @CHILD21@
+1 CHIL @CHILD22@
+1 CHIL @CHILD23@
+1 CHIL @CHILD24@
+1 CHIL @CHILD25@
+1 CHIL @CHILD26@
+1 CHIL @CHILD27@
+1 CHIL @CHILD28@
+1 CHIL @CHILD29@
+1 CHIL @CHILD30@
+1 CHIL @CHILD31@
+1 CHIL @CHILD32@
+0 TRLR
diff --git a/data/tests/UTF_8_NOBOM_LF.gramps b/data/tests/UTF_8_NOBOM_LF.gramps
new file mode 100644
index 000000000..e2d9da800
--- /dev/null
+++ b/data/tests/UTF_8_NOBOM_LF.gramps
@@ -0,0 +1,882 @@
+
+
+
+
+
+
+ /H. Eichmann/
+ email: h.eichmann@@gmx.de
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ M
+
+ cyrillic
+
+
+
+
+
+
+ F
+
+ greek
+
+
+
+
+
+
+ U
+
+ Special Characters 0
+
+
+
+
+
+
+ U
+
+ Special Characters 1
+
+
+
+
+
+
+ U
+
+ Special Characters 2
+
+
+
+
+
+
+ U
+
+ Special Characters 3
+
+
+
+
+
+
+ U
+
+ code: 0309
+ HOOK ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0300
+ GRAVE
+
+
+
+
+
+
+ U
+
+ code: 0301
+ ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0302
+ CIRCUMFLEX
+
+
+
+
+
+
+ U
+
+ code: 0303
+ TILDE
+
+
+
+
+
+
+ U
+
+ code: 0304
+ MACRON
+
+
+
+
+
+
+ U
+
+ code: 0306
+ BREVE
+
+
+
+
+
+
+ U
+
+ code: 0307
+ DOT ABOVE
+
+
+
+
+
+
+ U
+
+ code: 0308
+ DIAERESIS
+
+
+
+
+
+
+ U
+
+ code: 030C
+ CARON
+
+
+
+
+
+
+ U
+
+ code: 030A
+ RING ABOVE
+
+
+
+
+
+
+ U
+
+ code: FE20
+ LIGATURE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE21
+ LIGATURE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0315
+ COMMA ABOVE RIGHT
+
+
+
+
+
+
+ U
+
+ code: 030B
+ DOUBLE ACUTE
+
+
+
+
+
+
+ U
+
+ code: 0310
+ CANDRABINDU
+
+
+
+
+
+
+ U
+
+ code: 0327
+ CEDILLA
+
+
+
+
+
+
+ U
+
+ code: 0328
+ OGONEK
+
+
+
+
+
+
+ U
+
+ code: 0323
+ DOT BELOW
+
+
+
+
+
+
+ U
+
+ code: 0324
+ DIAERESIS BELOW
+
+
+
+
+
+
+ U
+
+ code: 0325
+ RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 0333
+ DOUBLE LOW LINE
+
+
+
+
+
+
+ U
+
+ code: 0332
+ LINE BELOW
+
+
+
+
+
+
+ U
+
+ code: 0326
+ COMMA BELOW
+
+
+
+
+
+
+ U
+
+ code: 031C
+ LEFT HALF RING BELOW
+
+
+
+
+
+
+ U
+
+ code: 032E
+ BREVE BELOW
+
+
+
+
+
+
+ U
+
+ code: FE22
+ DOUBLE TILDE LEFT HALF
+
+
+
+
+
+
+ U
+
+ code: FE23
+ DOUBLE TILDE RIGHT HALF
+
+
+
+
+
+
+ U
+
+ code: 0313
+ COMMA ABOVE
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
+
+
+
+ абвгдежзийклмнопрстуфхцчшщъыьэюя
+
+
+
+ ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ
+
+
+
+ αβγδεζηθικλμνξοπρςστυφχψω
+
+
+
+ capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ)
+
+
+
+ capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭)
+
+
+
+ registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư)
+
+
+
+ modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ)
+
+
+
+ small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ)
+
+
+
+ small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư)
+
+
+
+ degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©)
+
+
+
+ music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß)
+
+
+
+ AEIOU,Yaeio,uy
+
+
+
+ ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ
+
+
+
+ AEIOU,WYaei,ouwy
+
+
+
+ ÀÈÌÒÙ,ẀỲàèì,òùẁỳ
+
+
+
+ ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz
+
+
+
+ ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź
+
+
+
+ ACEGH,IJOSU,WYZac,eghij,osuwy,z
+
+
+
+ ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ
+
+
+
+ AEINO,UVYae,inouv,y
+
+
+
+ ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ
+
+
+
+ AEGIO,Uaegi,ou
+
+
+
+ ĀĒḠĪŌ,Ūāēḡī,ōū
+
+
+
+ ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ
+
+
+
+ BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz
+
+
+
+ ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż
+
+
+
+ AEHIO,UWXYa,ehiot,uwxy
+
+
+
+ ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ
+
+
+
+ ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z
+
+
+
+ ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž
+
+
+
+ AUauw,y
+
+
+
+ ÅŮåůẘ,ẙ
+
+
+
+ - none -
+
+
+
+ OUou
+
+
+
+ ŐŰőű
+
+
+
+ CDGHK,LNRST,cdghk,lnrst
+
+
+
+ ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ
+
+
+
+ AEIOU,aeiou
+
+
+
+ ĄĘĮǪŲ,ąęįǫų
+
+
+
+ ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz
+
+
+
+ ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ
+
+
+
+ Uu
+
+
+
+ Ṳṳ
+
+
+
+ Aa
+
+
+
+ Ḁḁ
+
+
+
+ BDKLN,RTZbd,hklnr,tz
+
+
+
+ ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ
+
+
+
+ Hh
+
+
+
+ Ḫḫ
+
+
+
+
diff --git a/data/tests/cp1252_CR.ged b/data/tests/cp1252_CR.ged
new file mode 100644
index 000000000..f7d054e9f
--- /dev/null
+++ b/data/tests/cp1252_CR.ged
@@ -0,0 +1 @@
+0 HEAD
1 SOUR LIFELINES 3.0.62
1 DEST ANY
1 DATE 3 DEC 2010
2 TIME 7:03
1 GEDC
2 VERS 5.5
2 FORM LINEAGE-LINKED
1 LANG French
1 CHAR cp1252
0 @I1@ INDI
1 NAME Jean /Thomas/
1 SEX F
1 BIRT
2 DATE 1830
1 DEAT
2 DATE 1904
1 NOTE
2 CONT Table de caractres Windows cp1252
2 CONT 32 [ ]
2 CONT 33 [!]
2 CONT 34 ["]
2 CONT 35 [#]
2 CONT 36 [$]
2 CONT 37 [%]
2 CONT 38 [&]
2 CONT 39 [']
2 CONT 40 [(]
2 CONT 41 [)]
2 CONT 42 [*]
2 CONT 43 [+]
2 CONT 44 [,]
2 CONT 45 [-]
2 CONT 46 [.]
2 CONT 47 [/]
2 CONT 48 [0]
2 CONT 49 [1]
2 CONT 50 [2]
2 CONT 51 [3]
2 CONT 52 [4]
2 CONT 53 [5]
2 CONT 54 [6]
2 CONT 55 [7]
2 CONT 56 [8]
2 CONT 57 [9]
2 CONT 58 [:]
2 CONT 59 [;]
2 CONT 60 [<]
2 CONT 61 [=]
2 CONT 62 [>]
2 CONT 63 [?]
2 CONT 64 [@]
2 CONT 65 [A]
2 CONT 66 [B]
2 CONT 67 [C]
2 CONT 68 [D]
2 CONT 69 [E]
2 CONT 70 [F]
2 CONT 71 [G]
2 CONT 72 [H]
2 CONT 73 [I]
2 CONT 74 [J]
2 CONT 75 [K]
2 CONT 76 [L]
2 CONT 77 [M]
2 CONT 78 [N]
2 CONT 79 [O]
2 CONT 80 [P]
2 CONT 81 [Q]
2 CONT 82 [R]
2 CONT 83 [S]
2 CONT 84 [T]
2 CONT 85 [U]
2 CONT 86 [V]
2 CONT 87 [W]
2 CONT 88 [X]
2 CONT 89 [Y]
2 CONT 90 [Z]
2 CONT 91 [[]
2 CONT 92 [\]
2 CONT 93 []]
2 CONT 94 [^]
2 CONT 95 [_]
2 CONT 96 [`]
2 CONT 97 [a]
2 CONT 98 [b]
2 CONT 99 [c]
2 CONT 100 [d]
2 CONT 101 [e]
2 CONT 102 [f]
2 CONT 103 [g]
2 CONT 104 [h]
2 CONT 105 [i]
2 CONT 106 [j]
2 CONT 107 [k]
2 CONT 108 [l]
2 CONT 109 [m]
2 CONT 110 [n]
2 CONT 111 [o]
2 CONT 112 [p]
2 CONT 113 [q]
2 CONT 114 [r]
2 CONT 115 [s]
2 CONT 116 [t]
2 CONT 117 [u]
2 CONT 118 [v]
2 CONT 119 [w]
2 CONT 120 [x]
2 CONT 121 [y]
2 CONT 122 [z]
2 CONT 123 [{]
2 CONT 124 [|]
2 CONT 125 [}]
2 CONT 126 [~]
2 CONT 128 []
2 CONT 130 []
2 CONT 131 []
2 CONT 132 []
2 CONT 133 []
2 CONT 134 []
2 CONT 135 []
2 CONT 136 []
2 CONT 137 []
2 CONT 138 []
2 CONT 139 []
2 CONT 140 []
2 CONT 145 []
2 CONT 146 []
2 CONT 147 []
2 CONT 148 []
2 CONT 149 []
2 CONT 150 []
2 CONT 151 []
2 CONT 152 []
2 CONT 153 []
2 CONT 154 []
2 CONT 155 []
2 CONT 156 []
2 CONT 159 []
2 CONT 160 []
2 CONT 161 []
2 CONT 162 []
2 CONT 163 []
2 CONT 164 []
2 CONT 165 []
2 CONT 166 []
2 CONT 167 []
2 CONT 168 []
2 CONT 169 []
2 CONT 170 []
2 CONT 171 []
2 CONT 172 []
2 CONT 173 []
2 CONT 174 []
2 CONT 175 []
2 CONT 176 []
2 CONT 177 []
2 CONT 178 []
2 CONT 179 []
2 CONT 180 []
2 CONT 181 []
2 CONT 182 []
2 CONT 183 []
2 CONT 184 []
2 CONT 185 []
2 CONT 186 []
2 CONT 187 []
2 CONT 188 []
2 CONT 189 []
2 CONT 190 []
2 CONT 191 []
2 CONT 192 []
2 CONT 193 []
2 CONT 194 []
2 CONT 195 []
2 CONT 196 []
2 CONT 197 []
2 CONT 198 []
2 CONT 199 []
2 CONT 200 []
2 CONT 201 []
2 CONT 202 []
2 CONT 203 []
2 CONT 204 []
2 CONT 205 []
2 CONT 206 []
2 CONT 207 []
2 CONT 208 []
2 CONT 209 []
2 CONT 210 []
2 CONT 211 []
2 CONT 212 []
2 CONT 213 []
2 CONT 214 []
2 CONT 215 []
2 CONT 216 []
2 CONT 217 []
2 CONT 218 []
2 CONT 219 []
2 CONT 220 []
2 CONT 221 []
2 CONT 222 []
2 CONT 223 []
2 CONT 224 []
2 CONT 225 []
2 CONT 226 []
2 CONT 227 []
2 CONT 228 []
2 CONT 229 []
2 CONT 230 []
2 CONT 231 []
2 CONT 232 []
2 CONT 233 []
2 CONT 234 []
2 CONT 235 []
2 CONT 236 []
2 CONT 237 []
2 CONT 238 []
2 CONT 239 []
2 CONT 240 []
2 CONT 241 []
2 CONT 242 []
2 CONT 243 []
2 CONT 244 []
2 CONT 245 []
2 CONT 246 []
2 CONT 247 []
2 CONT 248 []
2 CONT 249 []
2 CONT 250 []
2 CONT 251 []
2 CONT 252 []
2 CONT 253 []
2 CONT 254 []
2 CONT 255 []
0 TRLR
\ No newline at end of file
diff --git a/data/tests/cp1252_CR.gramps b/data/tests/cp1252_CR.gramps
new file mode 100644
index 000000000..1bbeee04c
--- /dev/null
+++ b/data/tests/cp1252_CR.gramps
@@ -0,0 +1,260 @@
+
+
+
+
+
+
+ Paul Culley
+ 11210 Olde Mint House Ln
+ Tomball
+ Tx
+ USA
+ 77375
+ paulr2787@gmail.com
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ F
+
+ Jean
+ Thomas
+
+
+
+
+
+
+
+
+ Table de caractères Windows cp1252
+32 [ ]
+33 [!]
+34 ["]
+35 [#]
+36 [$]
+37 [%]
+38 [&]
+39 [']
+40 [(]
+41 [)]
+42 [*]
+43 [+]
+44 [,]
+45 [-]
+46 [.]
+47 [/]
+48 [0]
+49 [1]
+50 [2]
+51 [3]
+52 [4]
+53 [5]
+54 [6]
+55 [7]
+56 [8]
+57 [9]
+58 [:]
+59 [;]
+60 [<]
+61 [=]
+62 [>]
+63 [?]
+64 [@]
+65 [A]
+66 [B]
+67 [C]
+68 [D]
+69 [E]
+70 [F]
+71 [G]
+72 [H]
+73 [I]
+74 [J]
+75 [K]
+76 [L]
+77 [M]
+78 [N]
+79 [O]
+80 [P]
+81 [Q]
+82 [R]
+83 [S]
+84 [T]
+85 [U]
+86 [V]
+87 [W]
+88 [X]
+89 [Y]
+90 [Z]
+91 [[]
+92 [\]
+93 []]
+94 [^]
+95 [_]
+96 [`]
+97 [a]
+98 [b]
+99 [c]
+100 [d]
+101 [e]
+102 [f]
+103 [g]
+104 [h]
+105 [i]
+106 [j]
+107 [k]
+108 [l]
+109 [m]
+110 [n]
+111 [o]
+112 [p]
+113 [q]
+114 [r]
+115 [s]
+116 [t]
+117 [u]
+118 [v]
+119 [w]
+120 [x]
+121 [y]
+122 [z]
+123 [{]
+124 [|]
+125 [}]
+126 [~]
+128 [€]
+130 [‚]
+131 [ƒ]
+132 [„]
+133 […]
+134 [†]
+135 [‡]
+136 [ˆ]
+137 [‰]
+138 [Š]
+139 [‹]
+140 [Œ]
+145 [‘]
+146 [’]
+147 [“]
+148 [”]
+149 [•]
+150 [–]
+151 [—]
+152 [˜]
+153 [™]
+154 [š]
+155 [›]
+156 [œ]
+159 [Ÿ]
+160 [ ]
+161 [¡]
+162 [¢]
+163 [£]
+164 [¤]
+165 [¥]
+166 [¦]
+167 [§]
+168 [¨]
+169 [©]
+170 [ª]
+171 [«]
+172 [¬]
+173 []
+174 [®]
+175 [¯]
+176 [°]
+177 [±]
+178 [²]
+179 [³]
+180 [´]
+181 [µ]
+182 [¶]
+183 [·]
+184 [¸]
+185 [¹]
+186 [º]
+187 [»]
+188 [¼]
+189 [½]
+190 [¾]
+191 [¿]
+192 [À]
+193 [Á]
+194 [Â]
+195 [Ã]
+196 [Ä]
+197 [Å]
+198 [Æ]
+199 [Ç]
+200 [È]
+201 [É]
+202 [Ê]
+203 [Ë]
+204 [Ì]
+205 [Í]
+206 [Î]
+207 [Ï]
+208 [Ð]
+209 [Ñ]
+210 [Ò]
+211 [Ó]
+212 [Ô]
+213 [Õ]
+214 [Ö]
+215 [×]
+216 [Ø]
+217 [Ù]
+218 [Ú]
+219 [Û]
+220 [Ü]
+221 [Ý]
+222 [Þ]
+223 [ß]
+224 [à]
+225 [á]
+226 [â]
+227 [ã]
+228 [ä]
+229 [å]
+230 [æ]
+231 [ç]
+232 [è]
+233 [é]
+234 [ê]
+235 [ë]
+236 [ì]
+237 [í]
+238 [î]
+239 [ï]
+240 [ð]
+241 [ñ]
+242 [ò]
+243 [ó]
+244 [ô]
+245 [õ]
+246 [ö]
+247 [÷]
+248 [ø]
+249 [ù]
+250 [ú]
+251 [û]
+252 [ü]
+253 [ý]
+254 [þ]
+255 [ÿ]
+
+
+
diff --git a/data/tests/cp1252_CRLF.ged b/data/tests/cp1252_CRLF.ged
new file mode 100644
index 000000000..8c21f45d4
--- /dev/null
+++ b/data/tests/cp1252_CRLF.ged
@@ -0,0 +1,236 @@
+0 HEAD
+1 SOUR LIFELINES 3.0.62
+1 DEST ANY
+1 DATE 3 DEC 2010
+2 TIME 7:03
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 LANG French
+1 CHAR cp1252
+0 @I1@ INDI
+1 NAME Jean /Thomas/
+1 SEX F
+1 BIRT
+2 DATE 1830
+1 DEAT
+2 DATE 1904
+1 NOTE
+2 CONT Table de caractres Windows cp1252
+2 CONT 32 [ ]
+2 CONT 33 [!]
+2 CONT 34 ["]
+2 CONT 35 [#]
+2 CONT 36 [$]
+2 CONT 37 [%]
+2 CONT 38 [&]
+2 CONT 39 [']
+2 CONT 40 [(]
+2 CONT 41 [)]
+2 CONT 42 [*]
+2 CONT 43 [+]
+2 CONT 44 [,]
+2 CONT 45 [-]
+2 CONT 46 [.]
+2 CONT 47 [/]
+2 CONT 48 [0]
+2 CONT 49 [1]
+2 CONT 50 [2]
+2 CONT 51 [3]
+2 CONT 52 [4]
+2 CONT 53 [5]
+2 CONT 54 [6]
+2 CONT 55 [7]
+2 CONT 56 [8]
+2 CONT 57 [9]
+2 CONT 58 [:]
+2 CONT 59 [;]
+2 CONT 60 [<]
+2 CONT 61 [=]
+2 CONT 62 [>]
+2 CONT 63 [?]
+2 CONT 64 [@]
+2 CONT 65 [A]
+2 CONT 66 [B]
+2 CONT 67 [C]
+2 CONT 68 [D]
+2 CONT 69 [E]
+2 CONT 70 [F]
+2 CONT 71 [G]
+2 CONT 72 [H]
+2 CONT 73 [I]
+2 CONT 74 [J]
+2 CONT 75 [K]
+2 CONT 76 [L]
+2 CONT 77 [M]
+2 CONT 78 [N]
+2 CONT 79 [O]
+2 CONT 80 [P]
+2 CONT 81 [Q]
+2 CONT 82 [R]
+2 CONT 83 [S]
+2 CONT 84 [T]
+2 CONT 85 [U]
+2 CONT 86 [V]
+2 CONT 87 [W]
+2 CONT 88 [X]
+2 CONT 89 [Y]
+2 CONT 90 [Z]
+2 CONT 91 [[]
+2 CONT 92 [\]
+2 CONT 93 []]
+2 CONT 94 [^]
+2 CONT 95 [_]
+2 CONT 96 [`]
+2 CONT 97 [a]
+2 CONT 98 [b]
+2 CONT 99 [c]
+2 CONT 100 [d]
+2 CONT 101 [e]
+2 CONT 102 [f]
+2 CONT 103 [g]
+2 CONT 104 [h]
+2 CONT 105 [i]
+2 CONT 106 [j]
+2 CONT 107 [k]
+2 CONT 108 [l]
+2 CONT 109 [m]
+2 CONT 110 [n]
+2 CONT 111 [o]
+2 CONT 112 [p]
+2 CONT 113 [q]
+2 CONT 114 [r]
+2 CONT 115 [s]
+2 CONT 116 [t]
+2 CONT 117 [u]
+2 CONT 118 [v]
+2 CONT 119 [w]
+2 CONT 120 [x]
+2 CONT 121 [y]
+2 CONT 122 [z]
+2 CONT 123 [{]
+2 CONT 124 [|]
+2 CONT 125 [}]
+2 CONT 126 [~]
+2 CONT 128 []
+2 CONT 130 []
+2 CONT 131 []
+2 CONT 132 []
+2 CONT 133 []
+2 CONT 134 []
+2 CONT 135 []
+2 CONT 136 []
+2 CONT 137 []
+2 CONT 138 []
+2 CONT 139 []
+2 CONT 140 []
+2 CONT 145 []
+2 CONT 146 []
+2 CONT 147 []
+2 CONT 148 []
+2 CONT 149 []
+2 CONT 150 []
+2 CONT 151 []
+2 CONT 152 []
+2 CONT 153 []
+2 CONT 154 []
+2 CONT 155 []
+2 CONT 156 []
+2 CONT 159 []
+2 CONT 160 []
+2 CONT 161 []
+2 CONT 162 []
+2 CONT 163 []
+2 CONT 164 []
+2 CONT 165 []
+2 CONT 166 []
+2 CONT 167 []
+2 CONT 168 []
+2 CONT 169 []
+2 CONT 170 []
+2 CONT 171 []
+2 CONT 172 []
+2 CONT 173 []
+2 CONT 174 []
+2 CONT 175 []
+2 CONT 176 []
+2 CONT 177 []
+2 CONT 178 []
+2 CONT 179 []
+2 CONT 180 []
+2 CONT 181 []
+2 CONT 182 []
+2 CONT 183 []
+2 CONT 184 []
+2 CONT 185 []
+2 CONT 186 []
+2 CONT 187 []
+2 CONT 188 []
+2 CONT 189 []
+2 CONT 190 []
+2 CONT 191 []
+2 CONT 192 []
+2 CONT 193 []
+2 CONT 194 []
+2 CONT 195 []
+2 CONT 196 []
+2 CONT 197 []
+2 CONT 198 []
+2 CONT 199 []
+2 CONT 200 []
+2 CONT 201 []
+2 CONT 202 []
+2 CONT 203 []
+2 CONT 204 []
+2 CONT 205 []
+2 CONT 206 []
+2 CONT 207 []
+2 CONT 208 []
+2 CONT 209 []
+2 CONT 210 []
+2 CONT 211 []
+2 CONT 212 []
+2 CONT 213 []
+2 CONT 214 []
+2 CONT 215 []
+2 CONT 216 []
+2 CONT 217 []
+2 CONT 218 []
+2 CONT 219 []
+2 CONT 220 []
+2 CONT 221 []
+2 CONT 222 []
+2 CONT 223 []
+2 CONT 224 []
+2 CONT 225 []
+2 CONT 226 []
+2 CONT 227 []
+2 CONT 228 []
+2 CONT 229 []
+2 CONT 230 []
+2 CONT 231 []
+2 CONT 232 []
+2 CONT 233 []
+2 CONT 234 []
+2 CONT 235 []
+2 CONT 236 []
+2 CONT 237 []
+2 CONT 238 []
+2 CONT 239 []
+2 CONT 240 []
+2 CONT 241 []
+2 CONT 242 []
+2 CONT 243 []
+2 CONT 244 []
+2 CONT 245 []
+2 CONT 246 []
+2 CONT 247 []
+2 CONT 248 []
+2 CONT 249 []
+2 CONT 250 []
+2 CONT 251 []
+2 CONT 252 []
+2 CONT 253 []
+2 CONT 254 []
+2 CONT 255 []
+0 TRLR
diff --git a/data/tests/cp1252_CRLF.gramps b/data/tests/cp1252_CRLF.gramps
new file mode 100644
index 000000000..1bbeee04c
--- /dev/null
+++ b/data/tests/cp1252_CRLF.gramps
@@ -0,0 +1,260 @@
+
+
+
+
+
+
+ Paul Culley
+ 11210 Olde Mint House Ln
+ Tomball
+ Tx
+ USA
+ 77375
+ paulr2787@gmail.com
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ F
+
+ Jean
+ Thomas
+
+
+
+
+
+
+
+
+ Table de caractères Windows cp1252
+32 [ ]
+33 [!]
+34 ["]
+35 [#]
+36 [$]
+37 [%]
+38 [&]
+39 [']
+40 [(]
+41 [)]
+42 [*]
+43 [+]
+44 [,]
+45 [-]
+46 [.]
+47 [/]
+48 [0]
+49 [1]
+50 [2]
+51 [3]
+52 [4]
+53 [5]
+54 [6]
+55 [7]
+56 [8]
+57 [9]
+58 [:]
+59 [;]
+60 [<]
+61 [=]
+62 [>]
+63 [?]
+64 [@]
+65 [A]
+66 [B]
+67 [C]
+68 [D]
+69 [E]
+70 [F]
+71 [G]
+72 [H]
+73 [I]
+74 [J]
+75 [K]
+76 [L]
+77 [M]
+78 [N]
+79 [O]
+80 [P]
+81 [Q]
+82 [R]
+83 [S]
+84 [T]
+85 [U]
+86 [V]
+87 [W]
+88 [X]
+89 [Y]
+90 [Z]
+91 [[]
+92 [\]
+93 []]
+94 [^]
+95 [_]
+96 [`]
+97 [a]
+98 [b]
+99 [c]
+100 [d]
+101 [e]
+102 [f]
+103 [g]
+104 [h]
+105 [i]
+106 [j]
+107 [k]
+108 [l]
+109 [m]
+110 [n]
+111 [o]
+112 [p]
+113 [q]
+114 [r]
+115 [s]
+116 [t]
+117 [u]
+118 [v]
+119 [w]
+120 [x]
+121 [y]
+122 [z]
+123 [{]
+124 [|]
+125 [}]
+126 [~]
+128 [€]
+130 [‚]
+131 [ƒ]
+132 [„]
+133 […]
+134 [†]
+135 [‡]
+136 [ˆ]
+137 [‰]
+138 [Š]
+139 [‹]
+140 [Œ]
+145 [‘]
+146 [’]
+147 [“]
+148 [”]
+149 [•]
+150 [–]
+151 [—]
+152 [˜]
+153 [™]
+154 [š]
+155 [›]
+156 [œ]
+159 [Ÿ]
+160 [ ]
+161 [¡]
+162 [¢]
+163 [£]
+164 [¤]
+165 [¥]
+166 [¦]
+167 [§]
+168 [¨]
+169 [©]
+170 [ª]
+171 [«]
+172 [¬]
+173 []
+174 [®]
+175 [¯]
+176 [°]
+177 [±]
+178 [²]
+179 [³]
+180 [´]
+181 [µ]
+182 [¶]
+183 [·]
+184 [¸]
+185 [¹]
+186 [º]
+187 [»]
+188 [¼]
+189 [½]
+190 [¾]
+191 [¿]
+192 [À]
+193 [Á]
+194 [Â]
+195 [Ã]
+196 [Ä]
+197 [Å]
+198 [Æ]
+199 [Ç]
+200 [È]
+201 [É]
+202 [Ê]
+203 [Ë]
+204 [Ì]
+205 [Í]
+206 [Î]
+207 [Ï]
+208 [Ð]
+209 [Ñ]
+210 [Ò]
+211 [Ó]
+212 [Ô]
+213 [Õ]
+214 [Ö]
+215 [×]
+216 [Ø]
+217 [Ù]
+218 [Ú]
+219 [Û]
+220 [Ü]
+221 [Ý]
+222 [Þ]
+223 [ß]
+224 [à]
+225 [á]
+226 [â]
+227 [ã]
+228 [ä]
+229 [å]
+230 [æ]
+231 [ç]
+232 [è]
+233 [é]
+234 [ê]
+235 [ë]
+236 [ì]
+237 [í]
+238 [î]
+239 [ï]
+240 [ð]
+241 [ñ]
+242 [ò]
+243 [ó]
+244 [ô]
+245 [õ]
+246 [ö]
+247 [÷]
+248 [ø]
+249 [ù]
+250 [ú]
+251 [û]
+252 [ü]
+253 [ý]
+254 [þ]
+255 [ÿ]
+
+
+
diff --git a/data/tests/cp1252_LF.ged b/data/tests/cp1252_LF.ged
new file mode 100644
index 000000000..037a286c4
--- /dev/null
+++ b/data/tests/cp1252_LF.ged
@@ -0,0 +1,236 @@
+0 HEAD
+1 SOUR LIFELINES 3.0.62
+1 DEST ANY
+1 DATE 3 DEC 2010
+2 TIME 7:03
+1 GEDC
+2 VERS 5.5
+2 FORM LINEAGE-LINKED
+1 LANG French
+1 CHAR cp1252
+0 @I1@ INDI
+1 NAME Jean /Thomas/
+1 SEX F
+1 BIRT
+2 DATE 1830
+1 DEAT
+2 DATE 1904
+1 NOTE
+2 CONT Table de caractres Windows cp1252
+2 CONT 32 [ ]
+2 CONT 33 [!]
+2 CONT 34 ["]
+2 CONT 35 [#]
+2 CONT 36 [$]
+2 CONT 37 [%]
+2 CONT 38 [&]
+2 CONT 39 [']
+2 CONT 40 [(]
+2 CONT 41 [)]
+2 CONT 42 [*]
+2 CONT 43 [+]
+2 CONT 44 [,]
+2 CONT 45 [-]
+2 CONT 46 [.]
+2 CONT 47 [/]
+2 CONT 48 [0]
+2 CONT 49 [1]
+2 CONT 50 [2]
+2 CONT 51 [3]
+2 CONT 52 [4]
+2 CONT 53 [5]
+2 CONT 54 [6]
+2 CONT 55 [7]
+2 CONT 56 [8]
+2 CONT 57 [9]
+2 CONT 58 [:]
+2 CONT 59 [;]
+2 CONT 60 [<]
+2 CONT 61 [=]
+2 CONT 62 [>]
+2 CONT 63 [?]
+2 CONT 64 [@]
+2 CONT 65 [A]
+2 CONT 66 [B]
+2 CONT 67 [C]
+2 CONT 68 [D]
+2 CONT 69 [E]
+2 CONT 70 [F]
+2 CONT 71 [G]
+2 CONT 72 [H]
+2 CONT 73 [I]
+2 CONT 74 [J]
+2 CONT 75 [K]
+2 CONT 76 [L]
+2 CONT 77 [M]
+2 CONT 78 [N]
+2 CONT 79 [O]
+2 CONT 80 [P]
+2 CONT 81 [Q]
+2 CONT 82 [R]
+2 CONT 83 [S]
+2 CONT 84 [T]
+2 CONT 85 [U]
+2 CONT 86 [V]
+2 CONT 87 [W]
+2 CONT 88 [X]
+2 CONT 89 [Y]
+2 CONT 90 [Z]
+2 CONT 91 [[]
+2 CONT 92 [\]
+2 CONT 93 []]
+2 CONT 94 [^]
+2 CONT 95 [_]
+2 CONT 96 [`]
+2 CONT 97 [a]
+2 CONT 98 [b]
+2 CONT 99 [c]
+2 CONT 100 [d]
+2 CONT 101 [e]
+2 CONT 102 [f]
+2 CONT 103 [g]
+2 CONT 104 [h]
+2 CONT 105 [i]
+2 CONT 106 [j]
+2 CONT 107 [k]
+2 CONT 108 [l]
+2 CONT 109 [m]
+2 CONT 110 [n]
+2 CONT 111 [o]
+2 CONT 112 [p]
+2 CONT 113 [q]
+2 CONT 114 [r]
+2 CONT 115 [s]
+2 CONT 116 [t]
+2 CONT 117 [u]
+2 CONT 118 [v]
+2 CONT 119 [w]
+2 CONT 120 [x]
+2 CONT 121 [y]
+2 CONT 122 [z]
+2 CONT 123 [{]
+2 CONT 124 [|]
+2 CONT 125 [}]
+2 CONT 126 [~]
+2 CONT 128 []
+2 CONT 130 []
+2 CONT 131 []
+2 CONT 132 []
+2 CONT 133 []
+2 CONT 134 []
+2 CONT 135 []
+2 CONT 136 []
+2 CONT 137 []
+2 CONT 138 []
+2 CONT 139 []
+2 CONT 140 []
+2 CONT 145 []
+2 CONT 146 []
+2 CONT 147 []
+2 CONT 148 []
+2 CONT 149 []
+2 CONT 150 []
+2 CONT 151 []
+2 CONT 152 []
+2 CONT 153 []
+2 CONT 154 []
+2 CONT 155 []
+2 CONT 156 []
+2 CONT 159 []
+2 CONT 160 []
+2 CONT 161 []
+2 CONT 162 []
+2 CONT 163 []
+2 CONT 164 []
+2 CONT 165 []
+2 CONT 166 []
+2 CONT 167 []
+2 CONT 168 []
+2 CONT 169 []
+2 CONT 170 []
+2 CONT 171 []
+2 CONT 172 []
+2 CONT 173 []
+2 CONT 174 []
+2 CONT 175 []
+2 CONT 176 []
+2 CONT 177 []
+2 CONT 178 []
+2 CONT 179 []
+2 CONT 180 []
+2 CONT 181 []
+2 CONT 182 []
+2 CONT 183 []
+2 CONT 184 []
+2 CONT 185 []
+2 CONT 186 []
+2 CONT 187 []
+2 CONT 188 []
+2 CONT 189 []
+2 CONT 190 []
+2 CONT 191 []
+2 CONT 192 []
+2 CONT 193 []
+2 CONT 194 []
+2 CONT 195 []
+2 CONT 196 []
+2 CONT 197 []
+2 CONT 198 []
+2 CONT 199 []
+2 CONT 200 []
+2 CONT 201 []
+2 CONT 202 []
+2 CONT 203 []
+2 CONT 204 []
+2 CONT 205 []
+2 CONT 206 []
+2 CONT 207 []
+2 CONT 208 []
+2 CONT 209 []
+2 CONT 210 []
+2 CONT 211 []
+2 CONT 212 []
+2 CONT 213 []
+2 CONT 214 []
+2 CONT 215 []
+2 CONT 216 []
+2 CONT 217 []
+2 CONT 218 []
+2 CONT 219 []
+2 CONT 220 []
+2 CONT 221 []
+2 CONT 222 []
+2 CONT 223 []
+2 CONT 224 []
+2 CONT 225 []
+2 CONT 226 []
+2 CONT 227 []
+2 CONT 228 []
+2 CONT 229 []
+2 CONT 230 []
+2 CONT 231 []
+2 CONT 232 []
+2 CONT 233 []
+2 CONT 234 []
+2 CONT 235 []
+2 CONT 236 []
+2 CONT 237 []
+2 CONT 238 []
+2 CONT 239 []
+2 CONT 240 []
+2 CONT 241 []
+2 CONT 242 []
+2 CONT 243 []
+2 CONT 244 []
+2 CONT 245 []
+2 CONT 246 []
+2 CONT 247 []
+2 CONT 248 []
+2 CONT 249 []
+2 CONT 250 []
+2 CONT 251 []
+2 CONT 252 []
+2 CONT 253 []
+2 CONT 254 []
+2 CONT 255 []
+0 TRLR
diff --git a/data/tests/cp1252_LF.gramps b/data/tests/cp1252_LF.gramps
new file mode 100644
index 000000000..1bbeee04c
--- /dev/null
+++ b/data/tests/cp1252_LF.gramps
@@ -0,0 +1,260 @@
+
+
+
+
+
+
+ Paul Culley
+ 11210 Olde Mint House Ln
+ Tomball
+ Tx
+ USA
+ 77375
+ paulr2787@gmail.com
+
+
+
+
+ Birth
+
+
+
+ Death
+
+
+
+
+
+ F
+
+ Jean
+ Thomas
+
+
+
+
+
+
+
+
+ Table de caractères Windows cp1252
+32 [ ]
+33 [!]
+34 ["]
+35 [#]
+36 [$]
+37 [%]
+38 [&]
+39 [']
+40 [(]
+41 [)]
+42 [*]
+43 [+]
+44 [,]
+45 [-]
+46 [.]
+47 [/]
+48 [0]
+49 [1]
+50 [2]
+51 [3]
+52 [4]
+53 [5]
+54 [6]
+55 [7]
+56 [8]
+57 [9]
+58 [:]
+59 [;]
+60 [<]
+61 [=]
+62 [>]
+63 [?]
+64 [@]
+65 [A]
+66 [B]
+67 [C]
+68 [D]
+69 [E]
+70 [F]
+71 [G]
+72 [H]
+73 [I]
+74 [J]
+75 [K]
+76 [L]
+77 [M]
+78 [N]
+79 [O]
+80 [P]
+81 [Q]
+82 [R]
+83 [S]
+84 [T]
+85 [U]
+86 [V]
+87 [W]
+88 [X]
+89 [Y]
+90 [Z]
+91 [[]
+92 [\]
+93 []]
+94 [^]
+95 [_]
+96 [`]
+97 [a]
+98 [b]
+99 [c]
+100 [d]
+101 [e]
+102 [f]
+103 [g]
+104 [h]
+105 [i]
+106 [j]
+107 [k]
+108 [l]
+109 [m]
+110 [n]
+111 [o]
+112 [p]
+113 [q]
+114 [r]
+115 [s]
+116 [t]
+117 [u]
+118 [v]
+119 [w]
+120 [x]
+121 [y]
+122 [z]
+123 [{]
+124 [|]
+125 [}]
+126 [~]
+128 [€]
+130 [‚]
+131 [ƒ]
+132 [„]
+133 […]
+134 [†]
+135 [‡]
+136 [ˆ]
+137 [‰]
+138 [Š]
+139 [‹]
+140 [Œ]
+145 [‘]
+146 [’]
+147 [“]
+148 [”]
+149 [•]
+150 [–]
+151 [—]
+152 [˜]
+153 [™]
+154 [š]
+155 [›]
+156 [œ]
+159 [Ÿ]
+160 [ ]
+161 [¡]
+162 [¢]
+163 [£]
+164 [¤]
+165 [¥]
+166 [¦]
+167 [§]
+168 [¨]
+169 [©]
+170 [ª]
+171 [«]
+172 [¬]
+173 []
+174 [®]
+175 [¯]
+176 [°]
+177 [±]
+178 [²]
+179 [³]
+180 [´]
+181 [µ]
+182 [¶]
+183 [·]
+184 [¸]
+185 [¹]
+186 [º]
+187 [»]
+188 [¼]
+189 [½]
+190 [¾]
+191 [¿]
+192 [À]
+193 [Á]
+194 [Â]
+195 [Ã]
+196 [Ä]
+197 [Å]
+198 [Æ]
+199 [Ç]
+200 [È]
+201 [É]
+202 [Ê]
+203 [Ë]
+204 [Ì]
+205 [Í]
+206 [Î]
+207 [Ï]
+208 [Ð]
+209 [Ñ]
+210 [Ò]
+211 [Ó]
+212 [Ô]
+213 [Õ]
+214 [Ö]
+215 [×]
+216 [Ø]
+217 [Ù]
+218 [Ú]
+219 [Û]
+220 [Ü]
+221 [Ý]
+222 [Þ]
+223 [ß]
+224 [à]
+225 [á]
+226 [â]
+227 [ã]
+228 [ä]
+229 [å]
+230 [æ]
+231 [ç]
+232 [è]
+233 [é]
+234 [ê]
+235 [ë]
+236 [ì]
+237 [í]
+238 [î]
+239 [ï]
+240 [ð]
+241 [ñ]
+242 [ò]
+243 [ó]
+244 [ô]
+245 [õ]
+246 [ö]
+247 [÷]
+248 [ø]
+249 [ù]
+250 [ú]
+251 [û]
+252 [ü]
+253 [ý]
+254 [þ]
+255 [ÿ]
+
+
+
diff --git a/gramps/plugins/importer/importgedcom.py b/gramps/plugins/importer/importgedcom.py
index 482589b23..3f972ee99 100644
--- a/gramps/plugins/importer/importgedcom.py
+++ b/gramps/plugins/importer/importgedcom.py
@@ -65,26 +65,31 @@ def importData(database, filename, user):
if DbMixin not in database.__class__.__bases__:
database.__class__.__bases__ = (DbMixin,) + \
database.__class__.__bases__
-
try:
- with open(filename, "rb") as ifile:
+ # Opening in utf-8 with universal newline to allow cr, lf, and crlf
+ # If the file is really UTF16 or a varient, the next block code will not
+ # find anything even if it is there, but this is ok since it won't be
+ # ANSEL, or is inconsistent...
+ with open(filename, "r", encoding='utf-8', errors='replace',
+ newline=None) as ifile:
ansel = False
gramps = False
for index in range(50):
- # Treat the file as though it is UTF-8 since this is the more modern
- # option; and anyway it doesn't really matter as we are only trying to
- # detect a CHAR or SOUR line which is only 7-bit ASCII anyway, and we
- # ignore anything that can't be translated.
+ # Treat the file as though it is UTF-8 since this is the more
+ # modern option; and anyway it doesn't really matter as we are
+ # only trying to detect a CHAR or SOUR line which is only
+ # 7-bit ASCII anyway, and we ignore anything that can't be
+ # translated.
line = ifile.readline()
- line = line.decode(encoding='utf-8', errors='replace')
line = line.split()
if len(line) == 0:
break
- if len(line) > 2 and line[1][0:4] == 'CHAR' and line[2] == "ANSEL":
+ if len(line) > 2 and line[1][0:4] == 'CHAR' \
+ and line[2] == "ANSEL":
ansel = True
- if len(line) > 2 and line[1][0:4] == 'SOUR' and line[2] == "GRAMPS":
+ if len(line) > 2 and line[1][0:4] == 'SOUR' \
+ and line[2] == "GRAMPS":
gramps = True
-
except IOError:
return
diff --git a/gramps/plugins/lib/libgedcom.py b/gramps/plugins/lib/libgedcom.py
index b19fef16e..34368d970 100755
--- a/gramps/plugins/lib/libgedcom.py
+++ b/gramps/plugins/lib/libgedcom.py
@@ -94,7 +94,7 @@ import codecs
from xml.parsers.expat import ParserCreate
from collections import defaultdict, OrderedDict
import string
-from io import StringIO
+from io import StringIO, TextIOWrapper
from urllib.parse import urlparse
#------------------------------------------------------------------------
@@ -1248,41 +1248,41 @@ class BaseReader:
class UTF8Reader(BaseReader):
- def __init__(self, ifile, __add_msg):
- BaseReader.__init__(self, ifile, 'utf8', __add_msg)
+ def __init__(self, ifile, __add_msg, enc):
+ BaseReader.__init__(self, ifile, enc, __add_msg)
self.reset()
-
- def reset(self):
- self.ifile.seek(0)
- data = self.ifile.read(3)
- if data != b"\xef\xbb\xbf":
- self.ifile.seek(0)
+ if enc == 'UTF_8_SIG':
+ self.ifile = TextIOWrapper(ifile, encoding='utf_8_sig',
+ errors='replace', newline=None)
+ else:
+ self.ifile = TextIOWrapper(ifile, encoding='utf_8',
+ errors='replace', newline=None)
def readline(self):
line = self.ifile.readline()
- line = line.decode(self.enc, errors='replace')
return line.translate(STRIP_DICT)
class UTF16Reader(BaseReader):
def __init__(self, ifile, __add_msg):
- new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16')
- BaseReader.__init__(self, new_file, '', __add_msg)
+ BaseReader.__init__(self, ifile, 'UTF16', __add_msg)
+ self.ifile = TextIOWrapper(ifile, encoding='utf_16',
+ errors='replace', newline=None)
self.reset()
def readline(self):
line = self.ifile.readline()
- line = line.decode('utf8', errors='replace')
return line.translate(STRIP_DICT)
class AnsiReader(BaseReader):
def __init__(self, ifile, __add_msg):
BaseReader.__init__(self, ifile, 'latin1', __add_msg)
+ self.ifile = TextIOWrapper(ifile, encoding='latin1',
+ errors='replace', newline=None)
def readline(self):
line = self.ifile.readline()
- line = line.decode(self.enc, errors='replace')
if line.translate(DEL_AND_C1) != line:
self.report_error("DEL or C1 control chars in line did you mean CHAR cp1252??", line)
return line.translate(STRIP_DICT)
@@ -1291,10 +1291,11 @@ class CP1252Reader(BaseReader):
def __init__(self, ifile, __add_msg):
BaseReader.__init__(self, ifile, 'cp1252', __add_msg)
+ self.ifile = TextIOWrapper(ifile, encoding='cp1252',
+ errors='replace', newline=None)
def readline(self):
line = self.ifile.readline()
- line = line.decode(self.enc, errors='replace')
return line.translate(STRIP_DICT)
class AnselReader(BaseReader):
@@ -1562,10 +1563,17 @@ class AnselReader(BaseReader):
return ans
def __init__(self, ifile, __add_msg):
- BaseReader.__init__(self, ifile, "", __add_msg)
+ BaseReader.__init__(self, ifile, "ANSEL", __add_msg)
+ # In theory, we should have been able to skip the encode/decode from
+ # ascii. But this way allows us to use pythons universal newline
+ self.ifile = TextIOWrapper(ifile, encoding='ascii',
+ errors='surrogateescape', newline=None)
def readline(self):
- return self.__ansel_to_unicode(self.ifile.readline())
+ line = self.ifile.readline()
+ linebytes = line.encode(encoding='ascii',
+ errors='surrogateescape')
+ return self.__ansel_to_unicode(linebytes)
#-------------------------------------------------------------------------
#
@@ -2673,8 +2681,8 @@ class GedcomParser(UpdateCallback):
if enc == "ANSEL":
rdr = AnselReader(ifile, self.__add_msg)
- elif enc in ("UTF-8", "UTF8"):
- rdr = UTF8Reader(ifile, self.__add_msg)
+ elif enc in ("UTF-8", "UTF8", "UTF_8_SIG"):
+ rdr = UTF8Reader(ifile, self.__add_msg, enc)
elif enc in ("UTF-16LE", "UTF-16BE", "UTF16", "UNICODE"):
rdr = UTF16Reader(ifile, self.__add_msg)
elif enc in ("CP1252", "WINDOWS-1252"):
@@ -7772,26 +7780,33 @@ class GedcomStageOne:
def __detect_file_decoder(self, input_file):
"""
Detects the file encoding of the file by looking for a BOM
- (byte order marker) in the GEDCOM file. If we detect a UTF-16
- encoded file, we must connect to a wrapper using the codecs
- package.
+ (byte order marker) in the GEDCOM file. If we detect a UTF-16 or
+ UTF-8-BOM encoded file, we choose appropriate decoders. If no BOM
+ is detected, we return in UTF-8 mode it is the more modern option;
+ and anyway it doesn't really matter as we are only looking for GEDCOM
+ keywords which are only 7-bit ASCII anyway.
+ In any case, we Always return the file in text mode with transparent
+ newline (CR, LF, or CRLF).
"""
line = input_file.read(2)
if line == b"\xef\xbb":
input_file.read(1)
- self.enc = "UTF8"
- return input_file
+ self.enc = "utf_8_sig"
+ return TextIOWrapper(input_file, encoding='utf_8_sig',
+ errors='replace', newline=None)
elif line == b"\xff\xfe" or line == b"\xfe\xff":
self.enc = "UTF16"
input_file.seek(0)
- return codecs.EncodedFile(input_file, 'utf8', 'utf16')
- elif not line :
+ return TextIOWrapper(input_file, encoding='utf_16',
+ errors='replace', newline=None)
+ elif not line:
raise GedcomError(self.__EMPTY_GED)
- elif line[0] == b"\x00" or line[1] == b"\x00":
+ elif line == b"\x30\x00" or line == b"\x00\x30":
raise GedcomError(self.__BAD_UTF16)
else:
input_file.seek(0)
- return input_file
+ return TextIOWrapper(input_file, encoding='utf-8',
+ errors='replace', newline=None)
def parse(self):
"""
@@ -7802,12 +7817,8 @@ class GedcomStageOne:
reader = self.__detect_file_decoder(self.ifile)
for line in reader:
- # Treat the file as though it is UTF-8 since this will be right if a
- # BOM was detected; it is the more modern option; and anyway it
- # doesn't really matter as we are only trying to detect a CHAR line
- # which is only 7-bit ASCII anyway, and we ignore anything that
- # can't be translated.
- line = line.decode(encoding='utf-8', errors='replace')
+ # Scan for a few items, keep counts. Also look for actual CHAR
+ # Keyword to figure out actual encodeing for non-unicode file types
line = line.strip()
if not line:
continue
@@ -7838,6 +7849,7 @@ class GedcomStageOne:
LOG.debug("parse pcnt %d" % self.pcnt)
LOG.debug("parse famc %s" % dict(self.famc))
LOG.debug("parse fams %s" % dict(self.fams))
+ self.ifile = reader # need this to keep python from autoclosing file
def get_famc_map(self):
"""