diff --git a/.gitattributes b/.gitattributes index c622f57b8..65dbf4920 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,3 +7,7 @@ # Have Github ignore js vendored files. # https://github.com/gramps-project/gramps/tree/master/data/javascript #data/javascript/*.js linguist-vendored + +# don't mess with line endings for Gedcom files +*.ged binary +*.GED binary diff --git a/data/tests/ANSEL_CR.GED b/data/tests/ANSEL_CR.GED new file mode 100644 index 000000000..ea588a26e --- /dev/null +++ b/data/tests/ANSEL_CR.GED @@ -0,0 +1 @@ +0 HEAD 1 CHAR ANSEL 1 SOUR REGISTERED_SOURCE_NAME 1 GEDC 2 VERS 5.5 2 FORM LINEAGE-LINKED 1 NOTE This GEDCOM transmission contains a charcter set test. It consists 2 CONT of a single family (two parents, many children). The parents are empty 2 CONT in the ANSEL version of the transmission. The children contain the 2 CONT combined letters and the special charcters (value > 128). 2 CONT The NAME tag of each 'person' is the name of the characters tested 2 CONT within the person. The BIRT.PLAC and DEAT.PLAC tags contain the 2 CONT test-strings. 2 CONT The first children contain special characters. Here the test string 2 CONT is 'character name (test character), ...' where 'character name' 2 CONT is the name of the character (like 'british pound') and 2 CONT 'test character' is a single byte representing this character 2 CONT in ANSEL. 2 CONT The last children contain combined characters. The name tag gives 2 CONT the name of the non-spacing character tested within the 'person'. 2 CONT Within the name the hex-values of the non-spacing character is given 2 CONT in ANSEL and UNICODE. The test strings contain the whole latin 2 CONT alphabet combined with this non-spacing character: captial letters 2 CONT in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag. 2 CONT Example: One 'person' is named 'circle above'. The BIRT.PLAC 2 CONT tag contains all 26 capital letters with a small ring on top. 2 CONT Note: Not all charcters can be displayed on all computers. 2 CONT This strongly depends on the installed fonts and codepages. 2 CONT Many of the combined characters generated here do not even have 2 CONT a UNICDOE code point! 2 CONT This file based mainly on the GEDCOM 5.5 specification 2 CONT (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) 2 CONT and on an updated ANSEL description in: 2 CONT http://www.gendex.com/gedcom55/55gcappd.htm 1 SUBM @SUBMITTER@ 1 DATE 20 JAN 1998 0 @SUBMITTER@ SUBM 1 NAME /H. Eichmann/ 1 ADDR email: h.eichmann@@gmx.de 0 @FATHER@ INDI 1 NAME /cyrillic (not possible in ANSEL)/ 1 SEX M 1 FAMS @FAMILY@ 0 @MOTHER@ INDI 1 NAME /greek (not possible in ANSEL)/ 1 SEX F 1 FAMS @FAMILY@ 0 @CHILD0@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 0/ 1 BIRT 2 PLAC slash l - uppercase (), slash o - uppercase (), slash d - uppercase (), thorn - uppercase () 1 DEAT 2 PLAC ligature ae - uppercase (), ligature oe - uppercase (), miagkii znak (), middle dot (), musical flat () 0 @CHILD1@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 1/ 1 BIRT 2 PLAC patent mark (), plus-or-minus (), hook o - uppercase (), hook u - uppercase () 1 DEAT 2 PLAC alif (), ayn (), slash l - lowercase (), slash o - lowercase (), slash d - lowercase () 0 @CHILD2@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 2/ 1 BIRT 2 PLAC thorn - lowercase (), ligature ae - lowercase (), ligature oe - lowercase (), tverdyi znak () 1 DEAT 2 PLAC dotless i - lowercase (), british pound (), eth (), hook o - lowercase (), hook u - lowercase () 0 @CHILD3@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 3/ 1 BIRT 2 PLAC degree sign (), script l (), phonograph copyright mark (), copyright symbol () 1 DEAT 2 PLAC musical sharp (), inverted question mark (), inverted exclamation mark (), es zet () 0 @CHILD4@ INDI 1 FAMC @FAMILY@ 1 NAME code: E0 (Unicode: hook above, 0309)/low rising tone mark/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD5@ INDI 1 FAMC @FAMILY@ 1 NAME code: E1 (Unicode: grave, 0300)/grave accent/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD6@ INDI 1 FAMC @FAMILY@ 1 NAME code: E2 (Unicode: acute, 0301)/acute accent/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD7@ INDI 1 FAMC @FAMILY@ 1 NAME code: E3 (Unicode: circumflex, 0302)/circumflex accent/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD8@ INDI 1 FAMC @FAMILY@ 1 NAME code: E4 (Unicode: tilde, 0303)/tilde/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD9@ INDI 1 FAMC @FAMILY@ 1 NAME code: E5 (Unicode: macron, 0304)/macron/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD10@ INDI 1 FAMC @FAMILY@ 1 NAME code: E6 (Unicode: breve, 0306)/breve/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD11@ INDI 1 FAMC @FAMILY@ 1 NAME code: E7 (Unicode: dot above, 0307)/dot above/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD12@ INDI 1 FAMC @FAMILY@ 1 NAME code: E8 (Unicode: diaeresis, 0308)/umlaut (dieresis)/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD13@ INDI 1 FAMC @FAMILY@ 1 NAME code: E9 (Unicode: caron, 030C)/hacek/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD14@ INDI 1 FAMC @FAMILY@ 1 NAME code: EA (Unicode: ring above, 030A)/circle above (angstrom)/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD15@ INDI 1 FAMC @FAMILY@ 1 NAME code: EB (Unicode: ligature left half, FE20)/ligature, left half/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD16@ INDI 1 FAMC @FAMILY@ 1 NAME code: EC (Unicode: ligature right half, FE21)/ligature, right half/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD17@ INDI 1 FAMC @FAMILY@ 1 NAME code: ED (Unicode: comma above right, 0315)/high comma, off center/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD18@ INDI 1 FAMC @FAMILY@ 1 NAME code: EE (Unicode: double acute, 030B)/double acute accent/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD19@ INDI 1 FAMC @FAMILY@ 1 NAME code: EF (Unicode: candrabindu, 0310)/candrabindu/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD20@ INDI 1 FAMC @FAMILY@ 1 NAME code: F0 (Unicode: cedilla, 0327)/cedilla/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD21@ INDI 1 FAMC @FAMILY@ 1 NAME code: F1 (Unicode: ogonek, 0328)/right hook/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD22@ INDI 1 FAMC @FAMILY@ 1 NAME code: F2 (Unicode: dot below, 0323)/dot below/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD23@ INDI 1 FAMC @FAMILY@ 1 NAME code: F3 (Unicode: diaeresis below, 0324)/double dot below/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD24@ INDI 1 FAMC @FAMILY@ 1 NAME code: F4 (Unicode: ring below, 0325)/circle below/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD25@ INDI 1 FAMC @FAMILY@ 1 NAME code: F5 (Unicode: double low line, 0333)/double underscore/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD26@ INDI 1 FAMC @FAMILY@ 1 NAME code: F6 (Unicode: line below, 0332)/underscore/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD27@ INDI 1 FAMC @FAMILY@ 1 NAME code: F7 (Unicode: comma below, 0326)/left hook/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD28@ INDI 1 FAMC @FAMILY@ 1 NAME code: F8 (Unicode: left half ring below, 031C)/right cedilla/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD29@ INDI 1 FAMC @FAMILY@ 1 NAME code: F9 (Unicode: breve below, 032E)/half circle below/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD30@ INDI 1 FAMC @FAMILY@ 1 NAME code: FA (Unicode: double tilde left half, FE22)/double tilde, left half/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD31@ INDI 1 FAMC @FAMILY@ 1 NAME code: FB (Unicode: double tilde right half, FE23)/double tilde, right half/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @CHILD32@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE (Unicode: comma above, 0313)/high comma, centered/ 1 BIRT 2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ 1 DEAT 2 PLAC abcdefghijklmnopqrstuvwxyz 0 @FAMILY@ FAM 1 HUSB @FATHER@ 1 WIFE @MOTHER@ 1 CHIL @CHILD0@ 1 CHIL @CHILD1@ 1 CHIL @CHILD2@ 1 CHIL @CHILD3@ 1 CHIL @CHILD4@ 1 CHIL @CHILD5@ 1 CHIL @CHILD6@ 1 CHIL @CHILD7@ 1 CHIL @CHILD8@ 1 CHIL @CHILD9@ 1 CHIL @CHILD10@ 1 CHIL @CHILD11@ 1 CHIL @CHILD12@ 1 CHIL @CHILD13@ 1 CHIL @CHILD14@ 1 CHIL @CHILD15@ 1 CHIL @CHILD16@ 1 CHIL @CHILD17@ 1 CHIL @CHILD18@ 1 CHIL @CHILD19@ 1 CHIL @CHILD20@ 1 CHIL @CHILD21@ 1 CHIL @CHILD22@ 1 CHIL @CHILD23@ 1 CHIL @CHILD24@ 1 CHIL @CHILD25@ 1 CHIL @CHILD26@ 1 CHIL @CHILD27@ 1 CHIL @CHILD28@ 1 CHIL @CHILD29@ 1 CHIL @CHILD30@ 1 CHIL @CHILD31@ 1 CHIL @CHILD32@ 0 TRLR \ No newline at end of file diff --git a/data/tests/ANSEL_CR.gramps b/data/tests/ANSEL_CR.gramps new file mode 100644 index 000000000..83eb3fbb7 --- /dev/null +++ b/data/tests/ANSEL_CR.gramps @@ -0,0 +1,926 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic (not possible in ANSEL) + + + + + F + + greek (not possible in ANSEL) + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: E0 (Unicode: hook above, 0309) + low rising tone mark + + + + + + + U + + code: E1 (Unicode: grave, 0300) + grave accent + + + + + + + U + + code: E2 (Unicode: acute, 0301) + acute accent + + + + + + + U + + code: E3 (Unicode: circumflex, 0302) + circumflex accent + + + + + + + U + + code: E4 (Unicode: tilde, 0303) + tilde + + + + + + + U + + code: E5 (Unicode: macron, 0304) + macron + + + + + + + U + + code: E6 (Unicode: breve, 0306) + breve + + + + + + + U + + code: E7 (Unicode: dot above, 0307) + dot above + + + + + + + U + + code: E8 (Unicode: diaeresis, 0308) + umlaut (dieresis) + + + + + + + U + + code: E9 (Unicode: caron, 030C) + hacek + + + + + + + U + + code: EA (Unicode: ring above, 030A) + circle above (angstrom) + + + + + + + U + + code: EB (Unicode: ligature left half, FE20) + ligature, left half + + + + + + + U + + code: EC (Unicode: ligature right half, FE21) + ligature, right half + + + + + + + U + + code: ED (Unicode: comma above right, 0315) + high comma, off center + + + + + + + U + + code: EE (Unicode: double acute, 030B) + double acute accent + + + + + + + U + + code: EF (Unicode: candrabindu, 0310) + candrabindu + + + + + + + U + + code: F0 (Unicode: cedilla, 0327) + cedilla + + + + + + + U + + code: F1 (Unicode: ogonek, 0328) + right hook + + + + + + + U + + code: F2 (Unicode: dot below, 0323) + dot below + + + + + + + U + + code: F3 (Unicode: diaeresis below, 0324) + double dot below + + + + + + + U + + code: F4 (Unicode: ring below, 0325) + circle below + + + + + + + U + + code: F5 (Unicode: double low line, 0333) + double underscore + + + + + + + U + + code: F6 (Unicode: line below, 0332) + underscore + + + + + + + U + + code: F7 (Unicode: comma below, 0326) + left hook + + + + + + + U + + code: F8 (Unicode: left half ring below, 031C) + right cedilla + + + + + + + U + + code: F9 (Unicode: breve below, 032E) + half circle below + + + + + + + U + + code: FA (Unicode: double tilde left half, FE22) + double tilde, left half + + + + + + + U + + code: FB (Unicode: double tilde right half, FE23) + double tilde, right half + + + + + + + U + + code: FE (Unicode: comma above, 0313) + high comma, centered + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + slash l - uppercase (Ł), slash o - uppercase (Ø), slash d - uppercase (Đ), thorn - uppercase (Þ) + + + + ligature ae - uppercase (Æ), ligature oe - uppercase (Œ), miagkii znak (ʹ), middle dot (·), musical flat (♭) + + + + patent mark (®), plus-or-minus (±), hook o - uppercase (Ơ), hook u - uppercase (Ư) + + + + alif (ʼ), ayn (ʻ), slash l - lowercase (ł), slash o - lowercase (ø), slash d - lowercase (đ) + + + + thorn - lowercase (þ), ligature ae - lowercase (æ), ligature oe - lowercase (œ), tverdyi znak (ʺ) + + + + dotless i - lowercase (ı), british pound (£), eth (ð), hook o - lowercase (ơ), hook u - lowercase (ư) + + + + degree sign (°), script l (ℓ), phonograph copyright mark (℗), copyright symbol (©) + + + + musical sharp (♯), inverted question mark (¿), inverted exclamation mark (¡), es zet (ß) + + + + ẢB̉C̉D̉ẺF̉G̉H̉ỈJ̉K̉L̉M̉N̉ỎP̉Q̉R̉S̉T̉ỦV̉W̉X̉ỶZ̉ + + + + ảb̉c̉d̉ẻf̉g̉h̉ỉj̉k̉l̉m̉n̉ỏp̉q̉r̉s̉t̉ủv̉w̉x̉ỷz̉ + + + + ÀB̀C̀D̀ÈF̀G̀H̀ÌJ̀K̀L̀M̀ǸÒP̀Q̀R̀S̀T̀ÙV̀ẀX̀ỲZ̀ + + + + àb̀c̀d̀èf̀g̀h̀ìj̀k̀l̀m̀ǹòp̀q̀r̀s̀t̀ùv̀ẁx̀ỳz̀ + + + + ÁB́ĆD́ÉF́ǴH́ÍJ́ḰĹḾŃÓṔQ́ŔŚT́ÚV́ẂX́ÝŹ + + + + áb́ćd́éf́ǵh́íj́ḱĺḿńóṕq́ŕśt́úv́ẃx́ýź + + + + ÂB̂ĈD̂ÊF̂ĜĤÎĴK̂L̂M̂N̂ÔP̂Q̂R̂ŜT̂ÛV̂ŴX̂ŶẐ + + + + âb̂ĉd̂êf̂ĝĥîĵk̂l̂m̂n̂ôp̂q̂r̂ŝt̂ûv̂ŵx̂ŷẑ + + + + ÃB̃C̃D̃ẼF̃G̃H̃ĨJ̃K̃L̃M̃ÑÕP̃Q̃R̃S̃T̃ŨṼW̃X̃ỸZ̃ + + + + ãb̃c̃d̃ẽf̃g̃h̃ĩj̃k̃l̃m̃ñõp̃q̃r̃s̃t̃ũṽw̃x̃ỹz̃ + + + + ĀB̄C̄D̄ĒF̄ḠH̄ĪJ̄K̄L̄M̄N̄ŌP̄Q̄R̄S̄T̄ŪV̄W̄X̄ȲZ̄ + + + + āb̄c̄d̄ēf̄ḡh̄īj̄k̄l̄m̄n̄ōp̄q̄r̄s̄t̄ūv̄w̄x̄ȳz̄ + + + + ĂB̆C̆D̆ĔF̆ĞH̆ĬJ̆K̆L̆M̆N̆ŎP̆Q̆R̆S̆T̆ŬV̆W̆X̆Y̆Z̆ + + + + ăb̆c̆d̆ĕf̆ğh̆ĭj̆k̆l̆m̆n̆ŏp̆q̆r̆s̆t̆ŭv̆w̆x̆y̆z̆ + + + + ȦḂĊḊĖḞĠḢİJ̇K̇L̇ṀṄȮṖQ̇ṘṠṪU̇V̇ẆẊẎŻ + + + + ȧḃċḋėḟġḣi̇j̇k̇l̇ṁṅȯṗq̇ṙṡṫu̇v̇ẇẋẏż + + + + ÄB̈C̈D̈ËF̈G̈ḦÏJ̈K̈L̈M̈N̈ÖP̈Q̈R̈S̈T̈ÜV̈ẄẌŸZ̈ + + + + äb̈c̈d̈ëf̈g̈ḧïj̈k̈l̈m̈n̈öp̈q̈r̈s̈ẗüv̈ẅẍÿz̈ + + + + ǍB̌ČĎĚF̌ǦȞǏJ̌ǨĽM̌ŇǑP̌Q̌ŘŠŤǓV̌W̌X̌Y̌Ž + + + + ǎb̌čďěf̌ǧȟǐǰǩľm̌ňǒp̌q̌řšťǔv̌w̌x̌y̌ž + + + + ÅB̊C̊D̊E̊F̊G̊H̊I̊J̊K̊L̊M̊N̊O̊P̊Q̊R̊S̊T̊ŮV̊W̊X̊Y̊Z̊ + + + + åb̊c̊d̊e̊f̊g̊h̊i̊j̊k̊l̊m̊n̊o̊p̊q̊r̊s̊t̊ův̊ẘx̊ẙz̊ + + + + A︠B︠C︠D︠E︠F︠G︠H︠I︠J︠K︠L︠M︠N︠O︠P︠Q︠R︠S︠T︠U︠V︠W︠X︠Y︠Z︠ + + + + a︠b︠c︠d︠e︠f︠g︠h︠i︠j︠k︠l︠m︠n︠o︠p︠q︠r︠s︠t︠u︠v︠w︠x︠y︠z︠ + + + + A︡B︡C︡D︡E︡F︡G︡H︡I︡J︡K︡L︡M︡N︡O︡P︡Q︡R︡S︡T︡U︡V︡W︡X︡Y︡Z︡ + + + + a︡b︡c︡d︡e︡f︡g︡h︡i︡j︡k︡l︡m︡n︡o︡p︡q︡r︡s︡t︡u︡v︡w︡x︡y︡z︡ + + + + A̕B̕C̕D̕E̕F̕G̕H̕I̕J̕K̕L̕M̕N̕O̕P̕Q̕R̕S̕T̕U̕V̕W̕X̕Y̕Z̕ + + + + a̕b̕c̕d̕e̕f̕g̕h̕i̕j̕k̕l̕m̕n̕o̕p̕q̕r̕s̕t̕u̕v̕w̕x̕y̕z̕ + + + + A̋B̋C̋D̋E̋F̋G̋H̋I̋J̋K̋L̋M̋N̋ŐP̋Q̋R̋S̋T̋ŰV̋W̋X̋Y̋Z̋ + + + + a̋b̋c̋d̋e̋f̋g̋h̋i̋j̋k̋l̋m̋n̋őp̋q̋r̋s̋t̋űv̋w̋x̋y̋z̋ + + + + A̐B̐C̐D̐E̐F̐G̐H̐I̐J̐K̐L̐M̐N̐O̐P̐Q̐R̐S̐T̐U̐V̐W̐X̐Y̐Z̐ + + + + a̐b̐c̐d̐e̐f̐g̐h̐i̐j̐k̐l̐m̐n̐o̐p̐q̐r̐s̐t̐u̐v̐w̐x̐y̐z̐ + + + + A̧B̧ÇḐȨF̧ĢḨI̧J̧ĶĻM̧ŅO̧P̧Q̧ŖŞŢU̧V̧W̧X̧Y̧Z̧ + + + + a̧b̧çḑȩf̧ģḩi̧j̧ķļm̧ņo̧p̧q̧ŗşţu̧v̧w̧x̧y̧z̧ + + + + ĄB̨C̨D̨ĘF̨G̨H̨ĮJ̨K̨L̨M̨N̨ǪP̨Q̨R̨S̨T̨ŲV̨W̨X̨Y̨Z̨ + + + + ąb̨c̨d̨ęf̨g̨h̨įj̨k̨l̨m̨n̨ǫp̨q̨r̨s̨t̨ųv̨w̨x̨y̨z̨ + + + + ẠḄC̣ḌẸF̣G̣ḤỊJ̣ḲḶṂṆỌP̣Q̣ṚṢṬỤṾẈX̣ỴẒ + + + + ạḅc̣ḍẹf̣g̣ḥịj̣ḳḷṃṇọp̣q̣ṛṣṭụṿẉx̣ỵẓ + + + + A̤B̤C̤D̤E̤F̤G̤H̤I̤J̤K̤L̤M̤N̤O̤P̤Q̤R̤S̤T̤ṲV̤W̤X̤Y̤Z̤ + + + + a̤b̤c̤d̤e̤f̤g̤h̤i̤j̤k̤l̤m̤n̤o̤p̤q̤r̤s̤t̤ṳv̤w̤x̤y̤z̤ + + + + ḀB̥C̥D̥E̥F̥G̥H̥I̥J̥K̥L̥M̥N̥O̥P̥Q̥R̥S̥T̥U̥V̥W̥X̥Y̥Z̥ + + + + ḁb̥c̥d̥e̥f̥g̥h̥i̥j̥k̥l̥m̥n̥o̥p̥q̥r̥s̥t̥u̥v̥w̥x̥y̥z̥ + + + + A̳B̳C̳D̳E̳F̳G̳H̳I̳J̳K̳L̳M̳N̳O̳P̳Q̳R̳S̳T̳U̳V̳W̳X̳Y̳Z̳ + + + + a̳b̳c̳d̳e̳f̳g̳h̳i̳j̳k̳l̳m̳n̳o̳p̳q̳r̳s̳t̳u̳v̳w̳x̳y̳z̳ + + + + A̲B̲C̲D̲E̲F̲G̲H̲I̲J̲K̲L̲M̲N̲O̲P̲Q̲R̲S̲T̲U̲V̲W̲X̲Y̲Z̲ + + + + a̲b̲c̲d̲e̲f̲g̲h̲i̲j̲k̲l̲m̲n̲o̲p̲q̲r̲s̲t̲u̲v̲w̲x̲y̲z̲ + + + + A̦B̦C̦D̦E̦F̦G̦H̦I̦J̦K̦L̦M̦N̦O̦P̦Q̦R̦ȘȚU̦V̦W̦X̦Y̦Z̦ + + + + a̦b̦c̦d̦e̦f̦g̦h̦i̦j̦k̦l̦m̦n̦o̦p̦q̦r̦șțu̦v̦w̦x̦y̦z̦ + + + + A̜B̜C̜D̜E̜F̜G̜H̜I̜J̜K̜L̜M̜N̜O̜P̜Q̜R̜S̜T̜U̜V̜W̜X̜Y̜Z̜ + + + + a̜b̜c̜d̜e̜f̜g̜h̜i̜j̜k̜l̜m̜n̜o̜p̜q̜r̜s̜t̜u̜v̜w̜x̜y̜z̜ + + + + A̮B̮C̮D̮E̮F̮G̮ḪI̮J̮K̮L̮M̮N̮O̮P̮Q̮R̮S̮T̮U̮V̮W̮X̮Y̮Z̮ + + + + a̮b̮c̮d̮e̮f̮g̮ḫi̮j̮k̮l̮m̮n̮o̮p̮q̮r̮s̮t̮u̮v̮w̮x̮y̮z̮ + + + + A︢B︢C︢D︢E︢F︢G︢H︢I︢J︢K︢L︢M︢N︢O︢P︢Q︢R︢S︢T︢U︢V︢W︢X︢Y︢Z︢ + + + + a︢b︢c︢d︢e︢f︢g︢h︢i︢j︢k︢l︢m︢n︢o︢p︢q︢r︢s︢t︢u︢v︢w︢x︢y︢z︢ + + + + A︣B︣C︣D︣E︣F︣G︣H︣I︣J︣K︣L︣M︣N︣O︣P︣Q︣R︣S︣T︣U︣V︣W︣X︣Y︣Z︣ + + + + a︣b︣c︣d︣e︣f︣g︣h︣i︣j︣k︣l︣m︣n︣o︣p︣q︣r︣s︣t︣u︣v︣w︣x︣y︣z︣ + + + + A̓B̓C̓D̓E̓F̓G̓H̓I̓J̓K̓L̓M̓N̓O̓P̓Q̓R̓S̓T̓U̓V̓W̓X̓Y̓Z̓ + + + + a̓b̓c̓d̓e̓f̓g̓h̓i̓j̓k̓l̓m̓n̓o̓p̓q̓r̓s̓t̓u̓v̓w̓x̓y̓z̓ + + + +
diff --git a/data/tests/ANSEL_CRLF.GED b/data/tests/ANSEL_CRLF.GED new file mode 100644 index 000000000..fe0b4b4e5 --- /dev/null +++ b/data/tests/ANSEL_CRLF.GED @@ -0,0 +1,315 @@ +0 HEAD +1 CHAR ANSEL +1 SOUR REGISTERED_SOURCE_NAME +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 NOTE This GEDCOM transmission contains a charcter set test. It consists +2 CONT of a single family (two parents, many children). The parents are empty +2 CONT in the ANSEL version of the transmission. The children contain the +2 CONT combined letters and the special charcters (value > 128). +2 CONT The NAME tag of each 'person' is the name of the characters tested +2 CONT within the person. The BIRT.PLAC and DEAT.PLAC tags contain the +2 CONT test-strings. +2 CONT The first children contain special characters. Here the test string +2 CONT is 'character name (test character), ...' where 'character name' +2 CONT is the name of the character (like 'british pound') and +2 CONT 'test character' is a single byte representing this character +2 CONT in ANSEL. +2 CONT The last children contain combined characters. The name tag gives +2 CONT the name of the non-spacing character tested within the 'person'. +2 CONT Within the name the hex-values of the non-spacing character is given +2 CONT in ANSEL and UNICODE. The test strings contain the whole latin +2 CONT alphabet combined with this non-spacing character: captial letters +2 CONT in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag. +2 CONT Example: One 'person' is named 'circle above'. The BIRT.PLAC +2 CONT tag contains all 26 capital letters with a small ring on top. +2 CONT Note: Not all charcters can be displayed on all computers. +2 CONT This strongly depends on the installed fonts and codepages. +2 CONT Many of the combined characters generated here do not even have +2 CONT a UNICDOE code point! +2 CONT This file based mainly on the GEDCOM 5.5 specification +2 CONT (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) +2 CONT and on an updated ANSEL description in: +2 CONT http://www.gendex.com/gedcom55/55gcappd.htm +1 SUBM @SUBMITTER@ +1 DATE 20 JAN 1998 +0 @SUBMITTER@ SUBM +1 NAME /H. Eichmann/ +1 ADDR email: h.eichmann@@gmx.de +0 @FATHER@ INDI +1 NAME /cyrillic (not possible in ANSEL)/ +1 SEX M +1 FAMS @FAMILY@ +0 @MOTHER@ INDI +1 NAME /greek (not possible in ANSEL)/ +1 SEX F +1 FAMS @FAMILY@ +0 @CHILD0@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 0/ +1 BIRT +2 PLAC slash l - uppercase (), slash o - uppercase (), slash d - uppercase (), thorn - uppercase () +1 DEAT +2 PLAC ligature ae - uppercase (), ligature oe - uppercase (), miagkii znak (), middle dot (), musical flat () +0 @CHILD1@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 1/ +1 BIRT +2 PLAC patent mark (), plus-or-minus (), hook o - uppercase (), hook u - uppercase () +1 DEAT +2 PLAC alif (), ayn (), slash l - lowercase (), slash o - lowercase (), slash d - lowercase () +0 @CHILD2@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 2/ +1 BIRT +2 PLAC thorn - lowercase (), ligature ae - lowercase (), ligature oe - lowercase (), tverdyi znak () +1 DEAT +2 PLAC dotless i - lowercase (), british pound (), eth (), hook o - lowercase (), hook u - lowercase () +0 @CHILD3@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 3/ +1 BIRT +2 PLAC degree sign (), script l (), phonograph copyright mark (), copyright symbol () +1 DEAT +2 PLAC musical sharp (), inverted question mark (), inverted exclamation mark (), es zet () +0 @CHILD4@ INDI +1 FAMC @FAMILY@ +1 NAME code: E0 (Unicode: hook above, 0309)/low rising tone mark/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD5@ INDI +1 FAMC @FAMILY@ +1 NAME code: E1 (Unicode: grave, 0300)/grave accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD6@ INDI +1 FAMC @FAMILY@ +1 NAME code: E2 (Unicode: acute, 0301)/acute accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD7@ INDI +1 FAMC @FAMILY@ +1 NAME code: E3 (Unicode: circumflex, 0302)/circumflex accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD8@ INDI +1 FAMC @FAMILY@ +1 NAME code: E4 (Unicode: tilde, 0303)/tilde/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD9@ INDI +1 FAMC @FAMILY@ +1 NAME code: E5 (Unicode: macron, 0304)/macron/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD10@ INDI +1 FAMC @FAMILY@ +1 NAME code: E6 (Unicode: breve, 0306)/breve/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD11@ INDI +1 FAMC @FAMILY@ +1 NAME code: E7 (Unicode: dot above, 0307)/dot above/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD12@ INDI +1 FAMC @FAMILY@ +1 NAME code: E8 (Unicode: diaeresis, 0308)/umlaut (dieresis)/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD13@ INDI +1 FAMC @FAMILY@ +1 NAME code: E9 (Unicode: caron, 030C)/hacek/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD14@ INDI +1 FAMC @FAMILY@ +1 NAME code: EA (Unicode: ring above, 030A)/circle above (angstrom)/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD15@ INDI +1 FAMC @FAMILY@ +1 NAME code: EB (Unicode: ligature left half, FE20)/ligature, left half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD16@ INDI +1 FAMC @FAMILY@ +1 NAME code: EC (Unicode: ligature right half, FE21)/ligature, right half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD17@ INDI +1 FAMC @FAMILY@ +1 NAME code: ED (Unicode: comma above right, 0315)/high comma, off center/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD18@ INDI +1 FAMC @FAMILY@ +1 NAME code: EE (Unicode: double acute, 030B)/double acute accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD19@ INDI +1 FAMC @FAMILY@ +1 NAME code: EF (Unicode: candrabindu, 0310)/candrabindu/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD20@ INDI +1 FAMC @FAMILY@ +1 NAME code: F0 (Unicode: cedilla, 0327)/cedilla/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD21@ INDI +1 FAMC @FAMILY@ +1 NAME code: F1 (Unicode: ogonek, 0328)/right hook/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD22@ INDI +1 FAMC @FAMILY@ +1 NAME code: F2 (Unicode: dot below, 0323)/dot below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD23@ INDI +1 FAMC @FAMILY@ +1 NAME code: F3 (Unicode: diaeresis below, 0324)/double dot below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD24@ INDI +1 FAMC @FAMILY@ +1 NAME code: F4 (Unicode: ring below, 0325)/circle below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD25@ INDI +1 FAMC @FAMILY@ +1 NAME code: F5 (Unicode: double low line, 0333)/double underscore/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD26@ INDI +1 FAMC @FAMILY@ +1 NAME code: F6 (Unicode: line below, 0332)/underscore/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD27@ INDI +1 FAMC @FAMILY@ +1 NAME code: F7 (Unicode: comma below, 0326)/left hook/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD28@ INDI +1 FAMC @FAMILY@ +1 NAME code: F8 (Unicode: left half ring below, 031C)/right cedilla/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD29@ INDI +1 FAMC @FAMILY@ +1 NAME code: F9 (Unicode: breve below, 032E)/half circle below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD30@ INDI +1 FAMC @FAMILY@ +1 NAME code: FA (Unicode: double tilde left half, FE22)/double tilde, left half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD31@ INDI +1 FAMC @FAMILY@ +1 NAME code: FB (Unicode: double tilde right half, FE23)/double tilde, right half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD32@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE (Unicode: comma above, 0313)/high comma, centered/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @FAMILY@ FAM +1 HUSB @FATHER@ +1 WIFE @MOTHER@ +1 CHIL @CHILD0@ +1 CHIL @CHILD1@ +1 CHIL @CHILD2@ +1 CHIL @CHILD3@ +1 CHIL @CHILD4@ +1 CHIL @CHILD5@ +1 CHIL @CHILD6@ +1 CHIL @CHILD7@ +1 CHIL @CHILD8@ +1 CHIL @CHILD9@ +1 CHIL @CHILD10@ +1 CHIL @CHILD11@ +1 CHIL @CHILD12@ +1 CHIL @CHILD13@ +1 CHIL @CHILD14@ +1 CHIL @CHILD15@ +1 CHIL @CHILD16@ +1 CHIL @CHILD17@ +1 CHIL @CHILD18@ +1 CHIL @CHILD19@ +1 CHIL @CHILD20@ +1 CHIL @CHILD21@ +1 CHIL @CHILD22@ +1 CHIL @CHILD23@ +1 CHIL @CHILD24@ +1 CHIL @CHILD25@ +1 CHIL @CHILD26@ +1 CHIL @CHILD27@ +1 CHIL @CHILD28@ +1 CHIL @CHILD29@ +1 CHIL @CHILD30@ +1 CHIL @CHILD31@ +1 CHIL @CHILD32@ +0 TRLR diff --git a/data/tests/ANSEL_CRLF.gramps b/data/tests/ANSEL_CRLF.gramps new file mode 100644 index 000000000..83eb3fbb7 --- /dev/null +++ b/data/tests/ANSEL_CRLF.gramps @@ -0,0 +1,926 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic (not possible in ANSEL) + + + + + F + + greek (not possible in ANSEL) + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: E0 (Unicode: hook above, 0309) + low rising tone mark + + + + + + + U + + code: E1 (Unicode: grave, 0300) + grave accent + + + + + + + U + + code: E2 (Unicode: acute, 0301) + acute accent + + + + + + + U + + code: E3 (Unicode: circumflex, 0302) + circumflex accent + + + + + + + U + + code: E4 (Unicode: tilde, 0303) + tilde + + + + + + + U + + code: E5 (Unicode: macron, 0304) + macron + + + + + + + U + + code: E6 (Unicode: breve, 0306) + breve + + + + + + + U + + code: E7 (Unicode: dot above, 0307) + dot above + + + + + + + U + + code: E8 (Unicode: diaeresis, 0308) + umlaut (dieresis) + + + + + + + U + + code: E9 (Unicode: caron, 030C) + hacek + + + + + + + U + + code: EA (Unicode: ring above, 030A) + circle above (angstrom) + + + + + + + U + + code: EB (Unicode: ligature left half, FE20) + ligature, left half + + + + + + + U + + code: EC (Unicode: ligature right half, FE21) + ligature, right half + + + + + + + U + + code: ED (Unicode: comma above right, 0315) + high comma, off center + + + + + + + U + + code: EE (Unicode: double acute, 030B) + double acute accent + + + + + + + U + + code: EF (Unicode: candrabindu, 0310) + candrabindu + + + + + + + U + + code: F0 (Unicode: cedilla, 0327) + cedilla + + + + + + + U + + code: F1 (Unicode: ogonek, 0328) + right hook + + + + + + + U + + code: F2 (Unicode: dot below, 0323) + dot below + + + + + + + U + + code: F3 (Unicode: diaeresis below, 0324) + double dot below + + + + + + + U + + code: F4 (Unicode: ring below, 0325) + circle below + + + + + + + U + + code: F5 (Unicode: double low line, 0333) + double underscore + + + + + + + U + + code: F6 (Unicode: line below, 0332) + underscore + + + + + + + U + + code: F7 (Unicode: comma below, 0326) + left hook + + + + + + + U + + code: F8 (Unicode: left half ring below, 031C) + right cedilla + + + + + + + U + + code: F9 (Unicode: breve below, 032E) + half circle below + + + + + + + U + + code: FA (Unicode: double tilde left half, FE22) + double tilde, left half + + + + + + + U + + code: FB (Unicode: double tilde right half, FE23) + double tilde, right half + + + + + + + U + + code: FE (Unicode: comma above, 0313) + high comma, centered + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + slash l - uppercase (Ł), slash o - uppercase (Ø), slash d - uppercase (Đ), thorn - uppercase (Þ) + + + + ligature ae - uppercase (Æ), ligature oe - uppercase (Œ), miagkii znak (ʹ), middle dot (·), musical flat (♭) + + + + patent mark (®), plus-or-minus (±), hook o - uppercase (Ơ), hook u - uppercase (Ư) + + + + alif (ʼ), ayn (ʻ), slash l - lowercase (ł), slash o - lowercase (ø), slash d - lowercase (đ) + + + + thorn - lowercase (þ), ligature ae - lowercase (æ), ligature oe - lowercase (œ), tverdyi znak (ʺ) + + + + dotless i - lowercase (ı), british pound (£), eth (ð), hook o - lowercase (ơ), hook u - lowercase (ư) + + + + degree sign (°), script l (ℓ), phonograph copyright mark (℗), copyright symbol (©) + + + + musical sharp (♯), inverted question mark (¿), inverted exclamation mark (¡), es zet (ß) + + + + ẢB̉C̉D̉ẺF̉G̉H̉ỈJ̉K̉L̉M̉N̉ỎP̉Q̉R̉S̉T̉ỦV̉W̉X̉ỶZ̉ + + + + ảb̉c̉d̉ẻf̉g̉h̉ỉj̉k̉l̉m̉n̉ỏp̉q̉r̉s̉t̉ủv̉w̉x̉ỷz̉ + + + + ÀB̀C̀D̀ÈF̀G̀H̀ÌJ̀K̀L̀M̀ǸÒP̀Q̀R̀S̀T̀ÙV̀ẀX̀ỲZ̀ + + + + àb̀c̀d̀èf̀g̀h̀ìj̀k̀l̀m̀ǹòp̀q̀r̀s̀t̀ùv̀ẁx̀ỳz̀ + + + + ÁB́ĆD́ÉF́ǴH́ÍJ́ḰĹḾŃÓṔQ́ŔŚT́ÚV́ẂX́ÝŹ + + + + áb́ćd́éf́ǵh́íj́ḱĺḿńóṕq́ŕśt́úv́ẃx́ýź + + + + ÂB̂ĈD̂ÊF̂ĜĤÎĴK̂L̂M̂N̂ÔP̂Q̂R̂ŜT̂ÛV̂ŴX̂ŶẐ + + + + âb̂ĉd̂êf̂ĝĥîĵk̂l̂m̂n̂ôp̂q̂r̂ŝt̂ûv̂ŵx̂ŷẑ + + + + ÃB̃C̃D̃ẼF̃G̃H̃ĨJ̃K̃L̃M̃ÑÕP̃Q̃R̃S̃T̃ŨṼW̃X̃ỸZ̃ + + + + ãb̃c̃d̃ẽf̃g̃h̃ĩj̃k̃l̃m̃ñõp̃q̃r̃s̃t̃ũṽw̃x̃ỹz̃ + + + + ĀB̄C̄D̄ĒF̄ḠH̄ĪJ̄K̄L̄M̄N̄ŌP̄Q̄R̄S̄T̄ŪV̄W̄X̄ȲZ̄ + + + + āb̄c̄d̄ēf̄ḡh̄īj̄k̄l̄m̄n̄ōp̄q̄r̄s̄t̄ūv̄w̄x̄ȳz̄ + + + + ĂB̆C̆D̆ĔF̆ĞH̆ĬJ̆K̆L̆M̆N̆ŎP̆Q̆R̆S̆T̆ŬV̆W̆X̆Y̆Z̆ + + + + ăb̆c̆d̆ĕf̆ğh̆ĭj̆k̆l̆m̆n̆ŏp̆q̆r̆s̆t̆ŭv̆w̆x̆y̆z̆ + + + + ȦḂĊḊĖḞĠḢİJ̇K̇L̇ṀṄȮṖQ̇ṘṠṪU̇V̇ẆẊẎŻ + + + + ȧḃċḋėḟġḣi̇j̇k̇l̇ṁṅȯṗq̇ṙṡṫu̇v̇ẇẋẏż + + + + ÄB̈C̈D̈ËF̈G̈ḦÏJ̈K̈L̈M̈N̈ÖP̈Q̈R̈S̈T̈ÜV̈ẄẌŸZ̈ + + + + äb̈c̈d̈ëf̈g̈ḧïj̈k̈l̈m̈n̈öp̈q̈r̈s̈ẗüv̈ẅẍÿz̈ + + + + ǍB̌ČĎĚF̌ǦȞǏJ̌ǨĽM̌ŇǑP̌Q̌ŘŠŤǓV̌W̌X̌Y̌Ž + + + + ǎb̌čďěf̌ǧȟǐǰǩľm̌ňǒp̌q̌řšťǔv̌w̌x̌y̌ž + + + + ÅB̊C̊D̊E̊F̊G̊H̊I̊J̊K̊L̊M̊N̊O̊P̊Q̊R̊S̊T̊ŮV̊W̊X̊Y̊Z̊ + + + + åb̊c̊d̊e̊f̊g̊h̊i̊j̊k̊l̊m̊n̊o̊p̊q̊r̊s̊t̊ův̊ẘx̊ẙz̊ + + + + A︠B︠C︠D︠E︠F︠G︠H︠I︠J︠K︠L︠M︠N︠O︠P︠Q︠R︠S︠T︠U︠V︠W︠X︠Y︠Z︠ + + + + a︠b︠c︠d︠e︠f︠g︠h︠i︠j︠k︠l︠m︠n︠o︠p︠q︠r︠s︠t︠u︠v︠w︠x︠y︠z︠ + + + + A︡B︡C︡D︡E︡F︡G︡H︡I︡J︡K︡L︡M︡N︡O︡P︡Q︡R︡S︡T︡U︡V︡W︡X︡Y︡Z︡ + + + + a︡b︡c︡d︡e︡f︡g︡h︡i︡j︡k︡l︡m︡n︡o︡p︡q︡r︡s︡t︡u︡v︡w︡x︡y︡z︡ + + + + A̕B̕C̕D̕E̕F̕G̕H̕I̕J̕K̕L̕M̕N̕O̕P̕Q̕R̕S̕T̕U̕V̕W̕X̕Y̕Z̕ + + + + a̕b̕c̕d̕e̕f̕g̕h̕i̕j̕k̕l̕m̕n̕o̕p̕q̕r̕s̕t̕u̕v̕w̕x̕y̕z̕ + + + + A̋B̋C̋D̋E̋F̋G̋H̋I̋J̋K̋L̋M̋N̋ŐP̋Q̋R̋S̋T̋ŰV̋W̋X̋Y̋Z̋ + + + + a̋b̋c̋d̋e̋f̋g̋h̋i̋j̋k̋l̋m̋n̋őp̋q̋r̋s̋t̋űv̋w̋x̋y̋z̋ + + + + A̐B̐C̐D̐E̐F̐G̐H̐I̐J̐K̐L̐M̐N̐O̐P̐Q̐R̐S̐T̐U̐V̐W̐X̐Y̐Z̐ + + + + a̐b̐c̐d̐e̐f̐g̐h̐i̐j̐k̐l̐m̐n̐o̐p̐q̐r̐s̐t̐u̐v̐w̐x̐y̐z̐ + + + + A̧B̧ÇḐȨF̧ĢḨI̧J̧ĶĻM̧ŅO̧P̧Q̧ŖŞŢU̧V̧W̧X̧Y̧Z̧ + + + + a̧b̧çḑȩf̧ģḩi̧j̧ķļm̧ņo̧p̧q̧ŗşţu̧v̧w̧x̧y̧z̧ + + + + ĄB̨C̨D̨ĘF̨G̨H̨ĮJ̨K̨L̨M̨N̨ǪP̨Q̨R̨S̨T̨ŲV̨W̨X̨Y̨Z̨ + + + + ąb̨c̨d̨ęf̨g̨h̨įj̨k̨l̨m̨n̨ǫp̨q̨r̨s̨t̨ųv̨w̨x̨y̨z̨ + + + + ẠḄC̣ḌẸF̣G̣ḤỊJ̣ḲḶṂṆỌP̣Q̣ṚṢṬỤṾẈX̣ỴẒ + + + + ạḅc̣ḍẹf̣g̣ḥịj̣ḳḷṃṇọp̣q̣ṛṣṭụṿẉx̣ỵẓ + + + + A̤B̤C̤D̤E̤F̤G̤H̤I̤J̤K̤L̤M̤N̤O̤P̤Q̤R̤S̤T̤ṲV̤W̤X̤Y̤Z̤ + + + + a̤b̤c̤d̤e̤f̤g̤h̤i̤j̤k̤l̤m̤n̤o̤p̤q̤r̤s̤t̤ṳv̤w̤x̤y̤z̤ + + + + ḀB̥C̥D̥E̥F̥G̥H̥I̥J̥K̥L̥M̥N̥O̥P̥Q̥R̥S̥T̥U̥V̥W̥X̥Y̥Z̥ + + + + ḁb̥c̥d̥e̥f̥g̥h̥i̥j̥k̥l̥m̥n̥o̥p̥q̥r̥s̥t̥u̥v̥w̥x̥y̥z̥ + + + + A̳B̳C̳D̳E̳F̳G̳H̳I̳J̳K̳L̳M̳N̳O̳P̳Q̳R̳S̳T̳U̳V̳W̳X̳Y̳Z̳ + + + + a̳b̳c̳d̳e̳f̳g̳h̳i̳j̳k̳l̳m̳n̳o̳p̳q̳r̳s̳t̳u̳v̳w̳x̳y̳z̳ + + + + A̲B̲C̲D̲E̲F̲G̲H̲I̲J̲K̲L̲M̲N̲O̲P̲Q̲R̲S̲T̲U̲V̲W̲X̲Y̲Z̲ + + + + a̲b̲c̲d̲e̲f̲g̲h̲i̲j̲k̲l̲m̲n̲o̲p̲q̲r̲s̲t̲u̲v̲w̲x̲y̲z̲ + + + + A̦B̦C̦D̦E̦F̦G̦H̦I̦J̦K̦L̦M̦N̦O̦P̦Q̦R̦ȘȚU̦V̦W̦X̦Y̦Z̦ + + + + a̦b̦c̦d̦e̦f̦g̦h̦i̦j̦k̦l̦m̦n̦o̦p̦q̦r̦șțu̦v̦w̦x̦y̦z̦ + + + + A̜B̜C̜D̜E̜F̜G̜H̜I̜J̜K̜L̜M̜N̜O̜P̜Q̜R̜S̜T̜U̜V̜W̜X̜Y̜Z̜ + + + + a̜b̜c̜d̜e̜f̜g̜h̜i̜j̜k̜l̜m̜n̜o̜p̜q̜r̜s̜t̜u̜v̜w̜x̜y̜z̜ + + + + A̮B̮C̮D̮E̮F̮G̮ḪI̮J̮K̮L̮M̮N̮O̮P̮Q̮R̮S̮T̮U̮V̮W̮X̮Y̮Z̮ + + + + a̮b̮c̮d̮e̮f̮g̮ḫi̮j̮k̮l̮m̮n̮o̮p̮q̮r̮s̮t̮u̮v̮w̮x̮y̮z̮ + + + + A︢B︢C︢D︢E︢F︢G︢H︢I︢J︢K︢L︢M︢N︢O︢P︢Q︢R︢S︢T︢U︢V︢W︢X︢Y︢Z︢ + + + + a︢b︢c︢d︢e︢f︢g︢h︢i︢j︢k︢l︢m︢n︢o︢p︢q︢r︢s︢t︢u︢v︢w︢x︢y︢z︢ + + + + A︣B︣C︣D︣E︣F︣G︣H︣I︣J︣K︣L︣M︣N︣O︣P︣Q︣R︣S︣T︣U︣V︣W︣X︣Y︣Z︣ + + + + a︣b︣c︣d︣e︣f︣g︣h︣i︣j︣k︣l︣m︣n︣o︣p︣q︣r︣s︣t︣u︣v︣w︣x︣y︣z︣ + + + + A̓B̓C̓D̓E̓F̓G̓H̓I̓J̓K̓L̓M̓N̓O̓P̓Q̓R̓S̓T̓U̓V̓W̓X̓Y̓Z̓ + + + + a̓b̓c̓d̓e̓f̓g̓h̓i̓j̓k̓l̓m̓n̓o̓p̓q̓r̓s̓t̓u̓v̓w̓x̓y̓z̓ + + + +
diff --git a/data/tests/ANSEL_LF.GED b/data/tests/ANSEL_LF.GED new file mode 100644 index 000000000..5996df047 --- /dev/null +++ b/data/tests/ANSEL_LF.GED @@ -0,0 +1,315 @@ +0 HEAD +1 CHAR ANSEL +1 SOUR REGISTERED_SOURCE_NAME +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 NOTE This GEDCOM transmission contains a charcter set test. It consists +2 CONT of a single family (two parents, many children). The parents are empty +2 CONT in the ANSEL version of the transmission. The children contain the +2 CONT combined letters and the special charcters (value > 128). +2 CONT The NAME tag of each 'person' is the name of the characters tested +2 CONT within the person. The BIRT.PLAC and DEAT.PLAC tags contain the +2 CONT test-strings. +2 CONT The first children contain special characters. Here the test string +2 CONT is 'character name (test character), ...' where 'character name' +2 CONT is the name of the character (like 'british pound') and +2 CONT 'test character' is a single byte representing this character +2 CONT in ANSEL. +2 CONT The last children contain combined characters. The name tag gives +2 CONT the name of the non-spacing character tested within the 'person'. +2 CONT Within the name the hex-values of the non-spacing character is given +2 CONT in ANSEL and UNICODE. The test strings contain the whole latin +2 CONT alphabet combined with this non-spacing character: captial letters +2 CONT in the BIRT.PLAC tag and small letters in the DEAT.PLAC tag. +2 CONT Example: One 'person' is named 'circle above'. The BIRT.PLAC +2 CONT tag contains all 26 capital letters with a small ring on top. +2 CONT Note: Not all charcters can be displayed on all computers. +2 CONT This strongly depends on the installed fonts and codepages. +2 CONT Many of the combined characters generated here do not even have +2 CONT a UNICDOE code point! +2 CONT This file based mainly on the GEDCOM 5.5 specification +2 CONT (see: ftp.gedcom.org/pub/genealogy/gedcom/gedcom55.zip) +2 CONT and on an updated ANSEL description in: +2 CONT http://www.gendex.com/gedcom55/55gcappd.htm +1 SUBM @SUBMITTER@ +1 DATE 20 JAN 1998 +0 @SUBMITTER@ SUBM +1 NAME /H. Eichmann/ +1 ADDR email: h.eichmann@@gmx.de +0 @FATHER@ INDI +1 NAME /cyrillic (not possible in ANSEL)/ +1 SEX M +1 FAMS @FAMILY@ +0 @MOTHER@ INDI +1 NAME /greek (not possible in ANSEL)/ +1 SEX F +1 FAMS @FAMILY@ +0 @CHILD0@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 0/ +1 BIRT +2 PLAC slash l - uppercase (), slash o - uppercase (), slash d - uppercase (), thorn - uppercase () +1 DEAT +2 PLAC ligature ae - uppercase (), ligature oe - uppercase (), miagkii znak (), middle dot (), musical flat () +0 @CHILD1@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 1/ +1 BIRT +2 PLAC patent mark (), plus-or-minus (), hook o - uppercase (), hook u - uppercase () +1 DEAT +2 PLAC alif (), ayn (), slash l - lowercase (), slash o - lowercase (), slash d - lowercase () +0 @CHILD2@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 2/ +1 BIRT +2 PLAC thorn - lowercase (), ligature ae - lowercase (), ligature oe - lowercase (), tverdyi znak () +1 DEAT +2 PLAC dotless i - lowercase (), british pound (), eth (), hook o - lowercase (), hook u - lowercase () +0 @CHILD3@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 3/ +1 BIRT +2 PLAC degree sign (), script l (), phonograph copyright mark (), copyright symbol () +1 DEAT +2 PLAC musical sharp (), inverted question mark (), inverted exclamation mark (), es zet () +0 @CHILD4@ INDI +1 FAMC @FAMILY@ +1 NAME code: E0 (Unicode: hook above, 0309)/low rising tone mark/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD5@ INDI +1 FAMC @FAMILY@ +1 NAME code: E1 (Unicode: grave, 0300)/grave accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD6@ INDI +1 FAMC @FAMILY@ +1 NAME code: E2 (Unicode: acute, 0301)/acute accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD7@ INDI +1 FAMC @FAMILY@ +1 NAME code: E3 (Unicode: circumflex, 0302)/circumflex accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD8@ INDI +1 FAMC @FAMILY@ +1 NAME code: E4 (Unicode: tilde, 0303)/tilde/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD9@ INDI +1 FAMC @FAMILY@ +1 NAME code: E5 (Unicode: macron, 0304)/macron/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD10@ INDI +1 FAMC @FAMILY@ +1 NAME code: E6 (Unicode: breve, 0306)/breve/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD11@ INDI +1 FAMC @FAMILY@ +1 NAME code: E7 (Unicode: dot above, 0307)/dot above/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD12@ INDI +1 FAMC @FAMILY@ +1 NAME code: E8 (Unicode: diaeresis, 0308)/umlaut (dieresis)/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD13@ INDI +1 FAMC @FAMILY@ +1 NAME code: E9 (Unicode: caron, 030C)/hacek/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD14@ INDI +1 FAMC @FAMILY@ +1 NAME code: EA (Unicode: ring above, 030A)/circle above (angstrom)/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD15@ INDI +1 FAMC @FAMILY@ +1 NAME code: EB (Unicode: ligature left half, FE20)/ligature, left half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD16@ INDI +1 FAMC @FAMILY@ +1 NAME code: EC (Unicode: ligature right half, FE21)/ligature, right half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD17@ INDI +1 FAMC @FAMILY@ +1 NAME code: ED (Unicode: comma above right, 0315)/high comma, off center/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD18@ INDI +1 FAMC @FAMILY@ +1 NAME code: EE (Unicode: double acute, 030B)/double acute accent/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD19@ INDI +1 FAMC @FAMILY@ +1 NAME code: EF (Unicode: candrabindu, 0310)/candrabindu/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD20@ INDI +1 FAMC @FAMILY@ +1 NAME code: F0 (Unicode: cedilla, 0327)/cedilla/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD21@ INDI +1 FAMC @FAMILY@ +1 NAME code: F1 (Unicode: ogonek, 0328)/right hook/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD22@ INDI +1 FAMC @FAMILY@ +1 NAME code: F2 (Unicode: dot below, 0323)/dot below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD23@ INDI +1 FAMC @FAMILY@ +1 NAME code: F3 (Unicode: diaeresis below, 0324)/double dot below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD24@ INDI +1 FAMC @FAMILY@ +1 NAME code: F4 (Unicode: ring below, 0325)/circle below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD25@ INDI +1 FAMC @FAMILY@ +1 NAME code: F5 (Unicode: double low line, 0333)/double underscore/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD26@ INDI +1 FAMC @FAMILY@ +1 NAME code: F6 (Unicode: line below, 0332)/underscore/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD27@ INDI +1 FAMC @FAMILY@ +1 NAME code: F7 (Unicode: comma below, 0326)/left hook/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD28@ INDI +1 FAMC @FAMILY@ +1 NAME code: F8 (Unicode: left half ring below, 031C)/right cedilla/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD29@ INDI +1 FAMC @FAMILY@ +1 NAME code: F9 (Unicode: breve below, 032E)/half circle below/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD30@ INDI +1 FAMC @FAMILY@ +1 NAME code: FA (Unicode: double tilde left half, FE22)/double tilde, left half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD31@ INDI +1 FAMC @FAMILY@ +1 NAME code: FB (Unicode: double tilde right half, FE23)/double tilde, right half/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @CHILD32@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE (Unicode: comma above, 0313)/high comma, centered/ +1 BIRT +2 PLAC ABCDEFGHIJKLMNOPQRSTUVWXYZ +1 DEAT +2 PLAC abcdefghijklmnopqrstuvwxyz +0 @FAMILY@ FAM +1 HUSB @FATHER@ +1 WIFE @MOTHER@ +1 CHIL @CHILD0@ +1 CHIL @CHILD1@ +1 CHIL @CHILD2@ +1 CHIL @CHILD3@ +1 CHIL @CHILD4@ +1 CHIL @CHILD5@ +1 CHIL @CHILD6@ +1 CHIL @CHILD7@ +1 CHIL @CHILD8@ +1 CHIL @CHILD9@ +1 CHIL @CHILD10@ +1 CHIL @CHILD11@ +1 CHIL @CHILD12@ +1 CHIL @CHILD13@ +1 CHIL @CHILD14@ +1 CHIL @CHILD15@ +1 CHIL @CHILD16@ +1 CHIL @CHILD17@ +1 CHIL @CHILD18@ +1 CHIL @CHILD19@ +1 CHIL @CHILD20@ +1 CHIL @CHILD21@ +1 CHIL @CHILD22@ +1 CHIL @CHILD23@ +1 CHIL @CHILD24@ +1 CHIL @CHILD25@ +1 CHIL @CHILD26@ +1 CHIL @CHILD27@ +1 CHIL @CHILD28@ +1 CHIL @CHILD29@ +1 CHIL @CHILD30@ +1 CHIL @CHILD31@ +1 CHIL @CHILD32@ +0 TRLR diff --git a/data/tests/ANSEL_LF.gramps b/data/tests/ANSEL_LF.gramps new file mode 100644 index 000000000..83eb3fbb7 --- /dev/null +++ b/data/tests/ANSEL_LF.gramps @@ -0,0 +1,926 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic (not possible in ANSEL) + + + + + F + + greek (not possible in ANSEL) + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: E0 (Unicode: hook above, 0309) + low rising tone mark + + + + + + + U + + code: E1 (Unicode: grave, 0300) + grave accent + + + + + + + U + + code: E2 (Unicode: acute, 0301) + acute accent + + + + + + + U + + code: E3 (Unicode: circumflex, 0302) + circumflex accent + + + + + + + U + + code: E4 (Unicode: tilde, 0303) + tilde + + + + + + + U + + code: E5 (Unicode: macron, 0304) + macron + + + + + + + U + + code: E6 (Unicode: breve, 0306) + breve + + + + + + + U + + code: E7 (Unicode: dot above, 0307) + dot above + + + + + + + U + + code: E8 (Unicode: diaeresis, 0308) + umlaut (dieresis) + + + + + + + U + + code: E9 (Unicode: caron, 030C) + hacek + + + + + + + U + + code: EA (Unicode: ring above, 030A) + circle above (angstrom) + + + + + + + U + + code: EB (Unicode: ligature left half, FE20) + ligature, left half + + + + + + + U + + code: EC (Unicode: ligature right half, FE21) + ligature, right half + + + + + + + U + + code: ED (Unicode: comma above right, 0315) + high comma, off center + + + + + + + U + + code: EE (Unicode: double acute, 030B) + double acute accent + + + + + + + U + + code: EF (Unicode: candrabindu, 0310) + candrabindu + + + + + + + U + + code: F0 (Unicode: cedilla, 0327) + cedilla + + + + + + + U + + code: F1 (Unicode: ogonek, 0328) + right hook + + + + + + + U + + code: F2 (Unicode: dot below, 0323) + dot below + + + + + + + U + + code: F3 (Unicode: diaeresis below, 0324) + double dot below + + + + + + + U + + code: F4 (Unicode: ring below, 0325) + circle below + + + + + + + U + + code: F5 (Unicode: double low line, 0333) + double underscore + + + + + + + U + + code: F6 (Unicode: line below, 0332) + underscore + + + + + + + U + + code: F7 (Unicode: comma below, 0326) + left hook + + + + + + + U + + code: F8 (Unicode: left half ring below, 031C) + right cedilla + + + + + + + U + + code: F9 (Unicode: breve below, 032E) + half circle below + + + + + + + U + + code: FA (Unicode: double tilde left half, FE22) + double tilde, left half + + + + + + + U + + code: FB (Unicode: double tilde right half, FE23) + double tilde, right half + + + + + + + U + + code: FE (Unicode: comma above, 0313) + high comma, centered + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + slash l - uppercase (Ł), slash o - uppercase (Ø), slash d - uppercase (Đ), thorn - uppercase (Þ) + + + + ligature ae - uppercase (Æ), ligature oe - uppercase (Œ), miagkii znak (ʹ), middle dot (·), musical flat (♭) + + + + patent mark (®), plus-or-minus (±), hook o - uppercase (Ơ), hook u - uppercase (Ư) + + + + alif (ʼ), ayn (ʻ), slash l - lowercase (ł), slash o - lowercase (ø), slash d - lowercase (đ) + + + + thorn - lowercase (þ), ligature ae - lowercase (æ), ligature oe - lowercase (œ), tverdyi znak (ʺ) + + + + dotless i - lowercase (ı), british pound (£), eth (ð), hook o - lowercase (ơ), hook u - lowercase (ư) + + + + degree sign (°), script l (ℓ), phonograph copyright mark (℗), copyright symbol (©) + + + + musical sharp (♯), inverted question mark (¿), inverted exclamation mark (¡), es zet (ß) + + + + ẢB̉C̉D̉ẺF̉G̉H̉ỈJ̉K̉L̉M̉N̉ỎP̉Q̉R̉S̉T̉ỦV̉W̉X̉ỶZ̉ + + + + ảb̉c̉d̉ẻf̉g̉h̉ỉj̉k̉l̉m̉n̉ỏp̉q̉r̉s̉t̉ủv̉w̉x̉ỷz̉ + + + + ÀB̀C̀D̀ÈF̀G̀H̀ÌJ̀K̀L̀M̀ǸÒP̀Q̀R̀S̀T̀ÙV̀ẀX̀ỲZ̀ + + + + àb̀c̀d̀èf̀g̀h̀ìj̀k̀l̀m̀ǹòp̀q̀r̀s̀t̀ùv̀ẁx̀ỳz̀ + + + + ÁB́ĆD́ÉF́ǴH́ÍJ́ḰĹḾŃÓṔQ́ŔŚT́ÚV́ẂX́ÝŹ + + + + áb́ćd́éf́ǵh́íj́ḱĺḿńóṕq́ŕśt́úv́ẃx́ýź + + + + ÂB̂ĈD̂ÊF̂ĜĤÎĴK̂L̂M̂N̂ÔP̂Q̂R̂ŜT̂ÛV̂ŴX̂ŶẐ + + + + âb̂ĉd̂êf̂ĝĥîĵk̂l̂m̂n̂ôp̂q̂r̂ŝt̂ûv̂ŵx̂ŷẑ + + + + ÃB̃C̃D̃ẼF̃G̃H̃ĨJ̃K̃L̃M̃ÑÕP̃Q̃R̃S̃T̃ŨṼW̃X̃ỸZ̃ + + + + ãb̃c̃d̃ẽf̃g̃h̃ĩj̃k̃l̃m̃ñõp̃q̃r̃s̃t̃ũṽw̃x̃ỹz̃ + + + + ĀB̄C̄D̄ĒF̄ḠH̄ĪJ̄K̄L̄M̄N̄ŌP̄Q̄R̄S̄T̄ŪV̄W̄X̄ȲZ̄ + + + + āb̄c̄d̄ēf̄ḡh̄īj̄k̄l̄m̄n̄ōp̄q̄r̄s̄t̄ūv̄w̄x̄ȳz̄ + + + + ĂB̆C̆D̆ĔF̆ĞH̆ĬJ̆K̆L̆M̆N̆ŎP̆Q̆R̆S̆T̆ŬV̆W̆X̆Y̆Z̆ + + + + ăb̆c̆d̆ĕf̆ğh̆ĭj̆k̆l̆m̆n̆ŏp̆q̆r̆s̆t̆ŭv̆w̆x̆y̆z̆ + + + + ȦḂĊḊĖḞĠḢİJ̇K̇L̇ṀṄȮṖQ̇ṘṠṪU̇V̇ẆẊẎŻ + + + + ȧḃċḋėḟġḣi̇j̇k̇l̇ṁṅȯṗq̇ṙṡṫu̇v̇ẇẋẏż + + + + ÄB̈C̈D̈ËF̈G̈ḦÏJ̈K̈L̈M̈N̈ÖP̈Q̈R̈S̈T̈ÜV̈ẄẌŸZ̈ + + + + äb̈c̈d̈ëf̈g̈ḧïj̈k̈l̈m̈n̈öp̈q̈r̈s̈ẗüv̈ẅẍÿz̈ + + + + ǍB̌ČĎĚF̌ǦȞǏJ̌ǨĽM̌ŇǑP̌Q̌ŘŠŤǓV̌W̌X̌Y̌Ž + + + + ǎb̌čďěf̌ǧȟǐǰǩľm̌ňǒp̌q̌řšťǔv̌w̌x̌y̌ž + + + + ÅB̊C̊D̊E̊F̊G̊H̊I̊J̊K̊L̊M̊N̊O̊P̊Q̊R̊S̊T̊ŮV̊W̊X̊Y̊Z̊ + + + + åb̊c̊d̊e̊f̊g̊h̊i̊j̊k̊l̊m̊n̊o̊p̊q̊r̊s̊t̊ův̊ẘx̊ẙz̊ + + + + A︠B︠C︠D︠E︠F︠G︠H︠I︠J︠K︠L︠M︠N︠O︠P︠Q︠R︠S︠T︠U︠V︠W︠X︠Y︠Z︠ + + + + a︠b︠c︠d︠e︠f︠g︠h︠i︠j︠k︠l︠m︠n︠o︠p︠q︠r︠s︠t︠u︠v︠w︠x︠y︠z︠ + + + + A︡B︡C︡D︡E︡F︡G︡H︡I︡J︡K︡L︡M︡N︡O︡P︡Q︡R︡S︡T︡U︡V︡W︡X︡Y︡Z︡ + + + + a︡b︡c︡d︡e︡f︡g︡h︡i︡j︡k︡l︡m︡n︡o︡p︡q︡r︡s︡t︡u︡v︡w︡x︡y︡z︡ + + + + A̕B̕C̕D̕E̕F̕G̕H̕I̕J̕K̕L̕M̕N̕O̕P̕Q̕R̕S̕T̕U̕V̕W̕X̕Y̕Z̕ + + + + a̕b̕c̕d̕e̕f̕g̕h̕i̕j̕k̕l̕m̕n̕o̕p̕q̕r̕s̕t̕u̕v̕w̕x̕y̕z̕ + + + + A̋B̋C̋D̋E̋F̋G̋H̋I̋J̋K̋L̋M̋N̋ŐP̋Q̋R̋S̋T̋ŰV̋W̋X̋Y̋Z̋ + + + + a̋b̋c̋d̋e̋f̋g̋h̋i̋j̋k̋l̋m̋n̋őp̋q̋r̋s̋t̋űv̋w̋x̋y̋z̋ + + + + A̐B̐C̐D̐E̐F̐G̐H̐I̐J̐K̐L̐M̐N̐O̐P̐Q̐R̐S̐T̐U̐V̐W̐X̐Y̐Z̐ + + + + a̐b̐c̐d̐e̐f̐g̐h̐i̐j̐k̐l̐m̐n̐o̐p̐q̐r̐s̐t̐u̐v̐w̐x̐y̐z̐ + + + + A̧B̧ÇḐȨF̧ĢḨI̧J̧ĶĻM̧ŅO̧P̧Q̧ŖŞŢU̧V̧W̧X̧Y̧Z̧ + + + + a̧b̧çḑȩf̧ģḩi̧j̧ķļm̧ņo̧p̧q̧ŗşţu̧v̧w̧x̧y̧z̧ + + + + ĄB̨C̨D̨ĘF̨G̨H̨ĮJ̨K̨L̨M̨N̨ǪP̨Q̨R̨S̨T̨ŲV̨W̨X̨Y̨Z̨ + + + + ąb̨c̨d̨ęf̨g̨h̨įj̨k̨l̨m̨n̨ǫp̨q̨r̨s̨t̨ųv̨w̨x̨y̨z̨ + + + + ẠḄC̣ḌẸF̣G̣ḤỊJ̣ḲḶṂṆỌP̣Q̣ṚṢṬỤṾẈX̣ỴẒ + + + + ạḅc̣ḍẹf̣g̣ḥịj̣ḳḷṃṇọp̣q̣ṛṣṭụṿẉx̣ỵẓ + + + + A̤B̤C̤D̤E̤F̤G̤H̤I̤J̤K̤L̤M̤N̤O̤P̤Q̤R̤S̤T̤ṲV̤W̤X̤Y̤Z̤ + + + + a̤b̤c̤d̤e̤f̤g̤h̤i̤j̤k̤l̤m̤n̤o̤p̤q̤r̤s̤t̤ṳv̤w̤x̤y̤z̤ + + + + ḀB̥C̥D̥E̥F̥G̥H̥I̥J̥K̥L̥M̥N̥O̥P̥Q̥R̥S̥T̥U̥V̥W̥X̥Y̥Z̥ + + + + ḁb̥c̥d̥e̥f̥g̥h̥i̥j̥k̥l̥m̥n̥o̥p̥q̥r̥s̥t̥u̥v̥w̥x̥y̥z̥ + + + + A̳B̳C̳D̳E̳F̳G̳H̳I̳J̳K̳L̳M̳N̳O̳P̳Q̳R̳S̳T̳U̳V̳W̳X̳Y̳Z̳ + + + + a̳b̳c̳d̳e̳f̳g̳h̳i̳j̳k̳l̳m̳n̳o̳p̳q̳r̳s̳t̳u̳v̳w̳x̳y̳z̳ + + + + A̲B̲C̲D̲E̲F̲G̲H̲I̲J̲K̲L̲M̲N̲O̲P̲Q̲R̲S̲T̲U̲V̲W̲X̲Y̲Z̲ + + + + a̲b̲c̲d̲e̲f̲g̲h̲i̲j̲k̲l̲m̲n̲o̲p̲q̲r̲s̲t̲u̲v̲w̲x̲y̲z̲ + + + + A̦B̦C̦D̦E̦F̦G̦H̦I̦J̦K̦L̦M̦N̦O̦P̦Q̦R̦ȘȚU̦V̦W̦X̦Y̦Z̦ + + + + a̦b̦c̦d̦e̦f̦g̦h̦i̦j̦k̦l̦m̦n̦o̦p̦q̦r̦șțu̦v̦w̦x̦y̦z̦ + + + + A̜B̜C̜D̜E̜F̜G̜H̜I̜J̜K̜L̜M̜N̜O̜P̜Q̜R̜S̜T̜U̜V̜W̜X̜Y̜Z̜ + + + + a̜b̜c̜d̜e̜f̜g̜h̜i̜j̜k̜l̜m̜n̜o̜p̜q̜r̜s̜t̜u̜v̜w̜x̜y̜z̜ + + + + A̮B̮C̮D̮E̮F̮G̮ḪI̮J̮K̮L̮M̮N̮O̮P̮Q̮R̮S̮T̮U̮V̮W̮X̮Y̮Z̮ + + + + a̮b̮c̮d̮e̮f̮g̮ḫi̮j̮k̮l̮m̮n̮o̮p̮q̮r̮s̮t̮u̮v̮w̮x̮y̮z̮ + + + + A︢B︢C︢D︢E︢F︢G︢H︢I︢J︢K︢L︢M︢N︢O︢P︢Q︢R︢S︢T︢U︢V︢W︢X︢Y︢Z︢ + + + + a︢b︢c︢d︢e︢f︢g︢h︢i︢j︢k︢l︢m︢n︢o︢p︢q︢r︢s︢t︢u︢v︢w︢x︢y︢z︢ + + + + A︣B︣C︣D︣E︣F︣G︣H︣I︣J︣K︣L︣M︣N︣O︣P︣Q︣R︣S︣T︣U︣V︣W︣X︣Y︣Z︣ + + + + a︣b︣c︣d︣e︣f︣g︣h︣i︣j︣k︣l︣m︣n︣o︣p︣q︣r︣s︣t︣u︣v︣w︣x︣y︣z︣ + + + + A̓B̓C̓D̓E̓F̓G̓H̓I̓J̓K̓L̓M̓N̓O̓P̓Q̓R̓S̓T̓U̓V̓W̓X̓Y̓Z̓ + + + + a̓b̓c̓d̓e̓f̓g̓h̓i̓j̓k̓l̓m̓n̓o̓p̓q̓r̓s̓t̓u̓v̓w̓x̓y̓z̓ + + + +
diff --git a/data/tests/Latin_1_CR.ged b/data/tests/Latin_1_CR.ged new file mode 100644 index 000000000..29854f65c --- /dev/null +++ b/data/tests/Latin_1_CR.ged @@ -0,0 +1 @@ +0 HEAD 1 SOUR NOTEPAD++ 1 DEST ANY 1 DATE 3 DEC 2010 2 TIME 7:03 1 GEDC 2 VERS 5.5 2 FORM LINEAGE-LINKED 1 LANG English 1 CHAR ASCII 0 @I1@ INDI 1 NAME Paul /Culley/ 1 SEX M 1 BIRT 2 DATE 1955 1 DEAT 2 DATE 2017 1 NOTE 2 CONT Table of Latin_1, ISO-8859-1 characters 2 CONT 32 [ ] 2 CONT 33 [!] 2 CONT 34 ["] 2 CONT 35 [#] 2 CONT 36 [$] 2 CONT 37 [%] 2 CONT 38 [&] 2 CONT 39 ['] 2 CONT 40 [(] 2 CONT 41 [)] 2 CONT 42 [*] 2 CONT 43 [+] 2 CONT 44 [,] 2 CONT 45 [-] 2 CONT 46 [.] 2 CONT 47 [/] 2 CONT 48 [0] 2 CONT 49 [1] 2 CONT 50 [2] 2 CONT 51 [3] 2 CONT 52 [4] 2 CONT 53 [5] 2 CONT 54 [6] 2 CONT 55 [7] 2 CONT 56 [8] 2 CONT 57 [9] 2 CONT 58 [:] 2 CONT 59 [;] 2 CONT 60 [<] 2 CONT 61 [=] 2 CONT 62 [>] 2 CONT 63 [?] 2 CONT 64 [@] 2 CONT 65 [A] 2 CONT 66 [B] 2 CONT 67 [C] 2 CONT 68 [D] 2 CONT 69 [E] 2 CONT 70 [F] 2 CONT 71 [G] 2 CONT 72 [H] 2 CONT 73 [I] 2 CONT 74 [J] 2 CONT 75 [K] 2 CONT 76 [L] 2 CONT 77 [M] 2 CONT 78 [N] 2 CONT 79 [O] 2 CONT 80 [P] 2 CONT 81 [Q] 2 CONT 82 [R] 2 CONT 83 [S] 2 CONT 84 [T] 2 CONT 85 [U] 2 CONT 86 [V] 2 CONT 87 [W] 2 CONT 88 [X] 2 CONT 89 [Y] 2 CONT 90 [Z] 2 CONT 91 [[] 2 CONT 92 [\] 2 CONT 93 []] 2 CONT 94 [^] 2 CONT 95 [_] 2 CONT 96 [`] 2 CONT 97 [a] 2 CONT 98 [b] 2 CONT 99 [c] 2 CONT 100 [d] 2 CONT 101 [e] 2 CONT 102 [f] 2 CONT 103 [g] 2 CONT 104 [h] 2 CONT 105 [i] 2 CONT 106 [j] 2 CONT 107 [k] 2 CONT 108 [l] 2 CONT 109 [m] 2 CONT 110 [n] 2 CONT 111 [o] 2 CONT 112 [p] 2 CONT 113 [q] 2 CONT 114 [r] 2 CONT 115 [s] 2 CONT 116 [t] 2 CONT 117 [u] 2 CONT 118 [v] 2 CONT 119 [w] 2 CONT 120 [x] 2 CONT 121 [y] 2 CONT 122 [z] 2 CONT 123 [{] 2 CONT 124 [|] 2 CONT 125 [}] 2 CONT 126 [~] 2 CONT 160 [] 2 CONT 161 [] 2 CONT 162 [] 2 CONT 163 [] 2 CONT 164 [] 2 CONT 165 [] 2 CONT 166 [] 2 CONT 167 [] 2 CONT 168 [] 2 CONT 169 [] 2 CONT 170 [] 2 CONT 171 [] 2 CONT 172 [] 2 CONT 173 [] 2 CONT 174 [] 2 CONT 175 [] 2 CONT 176 [] 2 CONT 177 [] 2 CONT 178 [] 2 CONT 179 [] 2 CONT 180 [] 2 CONT 181 [] 2 CONT 182 [] 2 CONT 183 [] 2 CONT 184 [] 2 CONT 185 [] 2 CONT 186 [] 2 CONT 187 [] 2 CONT 188 [] 2 CONT 189 [] 2 CONT 190 [] 2 CONT 191 [] 2 CONT 192 [] 2 CONT 193 [] 2 CONT 194 [] 2 CONT 195 [] 2 CONT 196 [] 2 CONT 197 [] 2 CONT 198 [] 2 CONT 199 [] 2 CONT 200 [] 2 CONT 201 [] 2 CONT 202 [] 2 CONT 203 [] 2 CONT 204 [] 2 CONT 205 [] 2 CONT 206 [] 2 CONT 207 [] 2 CONT 208 [] 2 CONT 209 [] 2 CONT 210 [] 2 CONT 211 [] 2 CONT 212 [] 2 CONT 213 [] 2 CONT 214 [] 2 CONT 215 [] 2 CONT 216 [] 2 CONT 217 [] 2 CONT 218 [] 2 CONT 219 [] 2 CONT 220 [] 2 CONT 221 [] 2 CONT 222 [] 2 CONT 223 [] 2 CONT 224 [] 2 CONT 225 [] 2 CONT 226 [] 2 CONT 227 [] 2 CONT 228 [] 2 CONT 229 [] 2 CONT 230 [] 2 CONT 231 [] 2 CONT 232 [] 2 CONT 233 [] 2 CONT 234 [] 2 CONT 235 [] 2 CONT 236 [] 2 CONT 237 [] 2 CONT 238 [] 2 CONT 239 [] 2 CONT 240 [] 2 CONT 241 [] 2 CONT 242 [] 2 CONT 243 [] 2 CONT 244 [] 2 CONT 245 [] 2 CONT 246 [] 2 CONT 247 [] 2 CONT 248 [] 2 CONT 249 [] 2 CONT 250 [] 2 CONT 251 [] 2 CONT 252 [] 2 CONT 253 [] 2 CONT 254 [] 2 CONT 255 [] 0 TRLR \ No newline at end of file diff --git a/data/tests/Latin_1_CR.gramps b/data/tests/Latin_1_CR.gramps new file mode 100644 index 000000000..99d113d6c --- /dev/null +++ b/data/tests/Latin_1_CR.gramps @@ -0,0 +1,235 @@ + + + +
+ + + Paul Culley + 11210 Olde Mint House Ln + Tomball + Tx + USA + 77375 + paulr2787@gmail.com + +
+ + + Birth + + + + Death + + + + + + M + + Paul + Culley + + + + + + + + + Table of Latin_1, ISO-8859-1 characters +32 [ ] +33 [!] +34 ["] +35 [#] +36 [$] +37 [%] +38 [&] +39 ['] +40 [(] +41 [)] +42 [*] +43 [+] +44 [,] +45 [-] +46 [.] +47 [/] +48 [0] +49 [1] +50 [2] +51 [3] +52 [4] +53 [5] +54 [6] +55 [7] +56 [8] +57 [9] +58 [:] +59 [;] +60 [<] +61 [=] +62 [>] +63 [?] +64 [@] +65 [A] +66 [B] +67 [C] +68 [D] +69 [E] +70 [F] +71 [G] +72 [H] +73 [I] +74 [J] +75 [K] +76 [L] +77 [M] +78 [N] +79 [O] +80 [P] +81 [Q] +82 [R] +83 [S] +84 [T] +85 [U] +86 [V] +87 [W] +88 [X] +89 [Y] +90 [Z] +91 [[] +92 [\] +93 []] +94 [^] +95 [_] +96 [`] +97 [a] +98 [b] +99 [c] +100 [d] +101 [e] +102 [f] +103 [g] +104 [h] +105 [i] +106 [j] +107 [k] +108 [l] +109 [m] +110 [n] +111 [o] +112 [p] +113 [q] +114 [r] +115 [s] +116 [t] +117 [u] +118 [v] +119 [w] +120 [x] +121 [y] +122 [z] +123 [{] +124 [|] +125 [}] +126 [~] +160 [ ] +161 [¡] +162 [¢] +163 [£] +164 [¤] +165 [¥] +166 [¦] +167 [§] +168 [¨] +169 [©] +170 [ª] +171 [«] +172 [¬] +173 [­] +174 [®] +175 [¯] +176 [°] +177 [±] +178 [²] +179 [³] +180 [´] +181 [µ] +182 [¶] +183 [·] +184 [¸] +185 [¹] +186 [º] +187 [»] +188 [¼] +189 [½] +190 [¾] +191 [¿] +192 [À] +193 [Á] +194 [Â] +195 [Ã] +196 [Ä] +197 [Å] +198 [Æ] +199 [Ç] +200 [È] +201 [É] +202 [Ê] +203 [Ë] +204 [Ì] +205 [Í] +206 [Î] +207 [Ï] +208 [Ð] +209 [Ñ] +210 [Ò] +211 [Ó] +212 [Ô] +213 [Õ] +214 [Ö] +215 [×] +216 [Ø] +217 [Ù] +218 [Ú] +219 [Û] +220 [Ü] +221 [Ý] +222 [Þ] +223 [ß] +224 [à] +225 [á] +226 [â] +227 [ã] +228 [ä] +229 [å] +230 [æ] +231 [ç] +232 [è] +233 [é] +234 [ê] +235 [ë] +236 [ì] +237 [í] +238 [î] +239 [ï] +240 [ð] +241 [ñ] +242 [ò] +243 [ó] +244 [ô] +245 [õ] +246 [ö] +247 [÷] +248 [ø] +249 [ù] +250 [ú] +251 [û] +252 [ü] +253 [ý] +254 [þ] +255 [ÿ] + + +
diff --git a/data/tests/Latin_1_CRLF.ged b/data/tests/Latin_1_CRLF.ged new file mode 100644 index 000000000..119b0197a --- /dev/null +++ b/data/tests/Latin_1_CRLF.ged @@ -0,0 +1,211 @@ +0 HEAD +1 SOUR NOTEPAD++ +1 DEST ANY +1 DATE 3 DEC 2010 +2 TIME 7:03 +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 LANG English +1 CHAR ASCII +0 @I1@ INDI +1 NAME Paul /Culley/ +1 SEX M +1 BIRT +2 DATE 1955 +1 DEAT +2 DATE 2017 +1 NOTE +2 CONT Table of Latin_1, ISO-8859-1 characters +2 CONT 32 [ ] +2 CONT 33 [!] +2 CONT 34 ["] +2 CONT 35 [#] +2 CONT 36 [$] +2 CONT 37 [%] +2 CONT 38 [&] +2 CONT 39 ['] +2 CONT 40 [(] +2 CONT 41 [)] +2 CONT 42 [*] +2 CONT 43 [+] +2 CONT 44 [,] +2 CONT 45 [-] +2 CONT 46 [.] +2 CONT 47 [/] +2 CONT 48 [0] +2 CONT 49 [1] +2 CONT 50 [2] +2 CONT 51 [3] +2 CONT 52 [4] +2 CONT 53 [5] +2 CONT 54 [6] +2 CONT 55 [7] +2 CONT 56 [8] +2 CONT 57 [9] +2 CONT 58 [:] +2 CONT 59 [;] +2 CONT 60 [<] +2 CONT 61 [=] +2 CONT 62 [>] +2 CONT 63 [?] +2 CONT 64 [@] +2 CONT 65 [A] +2 CONT 66 [B] +2 CONT 67 [C] +2 CONT 68 [D] +2 CONT 69 [E] +2 CONT 70 [F] +2 CONT 71 [G] +2 CONT 72 [H] +2 CONT 73 [I] +2 CONT 74 [J] +2 CONT 75 [K] +2 CONT 76 [L] +2 CONT 77 [M] +2 CONT 78 [N] +2 CONT 79 [O] +2 CONT 80 [P] +2 CONT 81 [Q] +2 CONT 82 [R] +2 CONT 83 [S] +2 CONT 84 [T] +2 CONT 85 [U] +2 CONT 86 [V] +2 CONT 87 [W] +2 CONT 88 [X] +2 CONT 89 [Y] +2 CONT 90 [Z] +2 CONT 91 [[] +2 CONT 92 [\] +2 CONT 93 []] +2 CONT 94 [^] +2 CONT 95 [_] +2 CONT 96 [`] +2 CONT 97 [a] +2 CONT 98 [b] +2 CONT 99 [c] +2 CONT 100 [d] +2 CONT 101 [e] +2 CONT 102 [f] +2 CONT 103 [g] +2 CONT 104 [h] +2 CONT 105 [i] +2 CONT 106 [j] +2 CONT 107 [k] +2 CONT 108 [l] +2 CONT 109 [m] +2 CONT 110 [n] +2 CONT 111 [o] +2 CONT 112 [p] +2 CONT 113 [q] +2 CONT 114 [r] +2 CONT 115 [s] +2 CONT 116 [t] +2 CONT 117 [u] +2 CONT 118 [v] +2 CONT 119 [w] +2 CONT 120 [x] +2 CONT 121 [y] +2 CONT 122 [z] +2 CONT 123 [{] +2 CONT 124 [|] +2 CONT 125 [}] +2 CONT 126 [~] +2 CONT 160 [] +2 CONT 161 [] +2 CONT 162 [] +2 CONT 163 [] +2 CONT 164 [] +2 CONT 165 [] +2 CONT 166 [] +2 CONT 167 [] +2 CONT 168 [] +2 CONT 169 [] +2 CONT 170 [] +2 CONT 171 [] +2 CONT 172 [] +2 CONT 173 [] +2 CONT 174 [] +2 CONT 175 [] +2 CONT 176 [] +2 CONT 177 [] +2 CONT 178 [] +2 CONT 179 [] +2 CONT 180 [] +2 CONT 181 [] +2 CONT 182 [] +2 CONT 183 [] +2 CONT 184 [] +2 CONT 185 [] +2 CONT 186 [] +2 CONT 187 [] +2 CONT 188 [] +2 CONT 189 [] +2 CONT 190 [] +2 CONT 191 [] +2 CONT 192 [] +2 CONT 193 [] +2 CONT 194 [] +2 CONT 195 [] +2 CONT 196 [] +2 CONT 197 [] +2 CONT 198 [] +2 CONT 199 [] +2 CONT 200 [] +2 CONT 201 [] +2 CONT 202 [] +2 CONT 203 [] +2 CONT 204 [] +2 CONT 205 [] +2 CONT 206 [] +2 CONT 207 [] +2 CONT 208 [] +2 CONT 209 [] +2 CONT 210 [] +2 CONT 211 [] +2 CONT 212 [] +2 CONT 213 [] +2 CONT 214 [] +2 CONT 215 [] +2 CONT 216 [] +2 CONT 217 [] +2 CONT 218 [] +2 CONT 219 [] +2 CONT 220 [] +2 CONT 221 [] +2 CONT 222 [] +2 CONT 223 [] +2 CONT 224 [] +2 CONT 225 [] +2 CONT 226 [] +2 CONT 227 [] +2 CONT 228 [] +2 CONT 229 [] +2 CONT 230 [] +2 CONT 231 [] +2 CONT 232 [] +2 CONT 233 [] +2 CONT 234 [] +2 CONT 235 [] +2 CONT 236 [] +2 CONT 237 [] +2 CONT 238 [] +2 CONT 239 [] +2 CONT 240 [] +2 CONT 241 [] +2 CONT 242 [] +2 CONT 243 [] +2 CONT 244 [] +2 CONT 245 [] +2 CONT 246 [] +2 CONT 247 [] +2 CONT 248 [] +2 CONT 249 [] +2 CONT 250 [] +2 CONT 251 [] +2 CONT 252 [] +2 CONT 253 [] +2 CONT 254 [] +2 CONT 255 [] +0 TRLR diff --git a/data/tests/Latin_1_CRLF.gramps b/data/tests/Latin_1_CRLF.gramps new file mode 100644 index 000000000..99d113d6c --- /dev/null +++ b/data/tests/Latin_1_CRLF.gramps @@ -0,0 +1,235 @@ + + + +
+ + + Paul Culley + 11210 Olde Mint House Ln + Tomball + Tx + USA + 77375 + paulr2787@gmail.com + +
+ + + Birth + + + + Death + + + + + + M + + Paul + Culley + + + + + + + + + Table of Latin_1, ISO-8859-1 characters +32 [ ] +33 [!] +34 ["] +35 [#] +36 [$] +37 [%] +38 [&] +39 ['] +40 [(] +41 [)] +42 [*] +43 [+] +44 [,] +45 [-] +46 [.] +47 [/] +48 [0] +49 [1] +50 [2] +51 [3] +52 [4] +53 [5] +54 [6] +55 [7] +56 [8] +57 [9] +58 [:] +59 [;] +60 [<] +61 [=] +62 [>] +63 [?] +64 [@] +65 [A] +66 [B] +67 [C] +68 [D] +69 [E] +70 [F] +71 [G] +72 [H] +73 [I] +74 [J] +75 [K] +76 [L] +77 [M] +78 [N] +79 [O] +80 [P] +81 [Q] +82 [R] +83 [S] +84 [T] +85 [U] +86 [V] +87 [W] +88 [X] +89 [Y] +90 [Z] +91 [[] +92 [\] +93 []] +94 [^] +95 [_] +96 [`] +97 [a] +98 [b] +99 [c] +100 [d] +101 [e] +102 [f] +103 [g] +104 [h] +105 [i] +106 [j] +107 [k] +108 [l] +109 [m] +110 [n] +111 [o] +112 [p] +113 [q] +114 [r] +115 [s] +116 [t] +117 [u] +118 [v] +119 [w] +120 [x] +121 [y] +122 [z] +123 [{] +124 [|] +125 [}] +126 [~] +160 [ ] +161 [¡] +162 [¢] +163 [£] +164 [¤] +165 [¥] +166 [¦] +167 [§] +168 [¨] +169 [©] +170 [ª] +171 [«] +172 [¬] +173 [­] +174 [®] +175 [¯] +176 [°] +177 [±] +178 [²] +179 [³] +180 [´] +181 [µ] +182 [¶] +183 [·] +184 [¸] +185 [¹] +186 [º] +187 [»] +188 [¼] +189 [½] +190 [¾] +191 [¿] +192 [À] +193 [Á] +194 [Â] +195 [Ã] +196 [Ä] +197 [Å] +198 [Æ] +199 [Ç] +200 [È] +201 [É] +202 [Ê] +203 [Ë] +204 [Ì] +205 [Í] +206 [Î] +207 [Ï] +208 [Ð] +209 [Ñ] +210 [Ò] +211 [Ó] +212 [Ô] +213 [Õ] +214 [Ö] +215 [×] +216 [Ø] +217 [Ù] +218 [Ú] +219 [Û] +220 [Ü] +221 [Ý] +222 [Þ] +223 [ß] +224 [à] +225 [á] +226 [â] +227 [ã] +228 [ä] +229 [å] +230 [æ] +231 [ç] +232 [è] +233 [é] +234 [ê] +235 [ë] +236 [ì] +237 [í] +238 [î] +239 [ï] +240 [ð] +241 [ñ] +242 [ò] +243 [ó] +244 [ô] +245 [õ] +246 [ö] +247 [÷] +248 [ø] +249 [ù] +250 [ú] +251 [û] +252 [ü] +253 [ý] +254 [þ] +255 [ÿ] + + +
diff --git a/data/tests/Latin_1_LF.ged b/data/tests/Latin_1_LF.ged new file mode 100644 index 000000000..7b2c3c8f7 --- /dev/null +++ b/data/tests/Latin_1_LF.ged @@ -0,0 +1,211 @@ +0 HEAD +1 SOUR NOTEPAD++ +1 DEST ANY +1 DATE 3 DEC 2010 +2 TIME 7:03 +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 LANG English +1 CHAR ASCII +0 @I1@ INDI +1 NAME Paul /Culley/ +1 SEX M +1 BIRT +2 DATE 1955 +1 DEAT +2 DATE 2017 +1 NOTE +2 CONT Table of Latin_1, ISO-8859-1 characters +2 CONT 32 [ ] +2 CONT 33 [!] +2 CONT 34 ["] +2 CONT 35 [#] +2 CONT 36 [$] +2 CONT 37 [%] +2 CONT 38 [&] +2 CONT 39 ['] +2 CONT 40 [(] +2 CONT 41 [)] +2 CONT 42 [*] +2 CONT 43 [+] +2 CONT 44 [,] +2 CONT 45 [-] +2 CONT 46 [.] +2 CONT 47 [/] +2 CONT 48 [0] +2 CONT 49 [1] +2 CONT 50 [2] +2 CONT 51 [3] +2 CONT 52 [4] +2 CONT 53 [5] +2 CONT 54 [6] +2 CONT 55 [7] +2 CONT 56 [8] +2 CONT 57 [9] +2 CONT 58 [:] +2 CONT 59 [;] +2 CONT 60 [<] +2 CONT 61 [=] +2 CONT 62 [>] +2 CONT 63 [?] +2 CONT 64 [@] +2 CONT 65 [A] +2 CONT 66 [B] +2 CONT 67 [C] +2 CONT 68 [D] +2 CONT 69 [E] +2 CONT 70 [F] +2 CONT 71 [G] +2 CONT 72 [H] +2 CONT 73 [I] +2 CONT 74 [J] +2 CONT 75 [K] +2 CONT 76 [L] +2 CONT 77 [M] +2 CONT 78 [N] +2 CONT 79 [O] +2 CONT 80 [P] +2 CONT 81 [Q] +2 CONT 82 [R] +2 CONT 83 [S] +2 CONT 84 [T] +2 CONT 85 [U] +2 CONT 86 [V] +2 CONT 87 [W] +2 CONT 88 [X] +2 CONT 89 [Y] +2 CONT 90 [Z] +2 CONT 91 [[] +2 CONT 92 [\] +2 CONT 93 []] +2 CONT 94 [^] +2 CONT 95 [_] +2 CONT 96 [`] +2 CONT 97 [a] +2 CONT 98 [b] +2 CONT 99 [c] +2 CONT 100 [d] +2 CONT 101 [e] +2 CONT 102 [f] +2 CONT 103 [g] +2 CONT 104 [h] +2 CONT 105 [i] +2 CONT 106 [j] +2 CONT 107 [k] +2 CONT 108 [l] +2 CONT 109 [m] +2 CONT 110 [n] +2 CONT 111 [o] +2 CONT 112 [p] +2 CONT 113 [q] +2 CONT 114 [r] +2 CONT 115 [s] +2 CONT 116 [t] +2 CONT 117 [u] +2 CONT 118 [v] +2 CONT 119 [w] +2 CONT 120 [x] +2 CONT 121 [y] +2 CONT 122 [z] +2 CONT 123 [{] +2 CONT 124 [|] +2 CONT 125 [}] +2 CONT 126 [~] +2 CONT 160 [] +2 CONT 161 [] +2 CONT 162 [] +2 CONT 163 [] +2 CONT 164 [] +2 CONT 165 [] +2 CONT 166 [] +2 CONT 167 [] +2 CONT 168 [] +2 CONT 169 [] +2 CONT 170 [] +2 CONT 171 [] +2 CONT 172 [] +2 CONT 173 [] +2 CONT 174 [] +2 CONT 175 [] +2 CONT 176 [] +2 CONT 177 [] +2 CONT 178 [] +2 CONT 179 [] +2 CONT 180 [] +2 CONT 181 [] +2 CONT 182 [] +2 CONT 183 [] +2 CONT 184 [] +2 CONT 185 [] +2 CONT 186 [] +2 CONT 187 [] +2 CONT 188 [] +2 CONT 189 [] +2 CONT 190 [] +2 CONT 191 [] +2 CONT 192 [] +2 CONT 193 [] +2 CONT 194 [] +2 CONT 195 [] +2 CONT 196 [] +2 CONT 197 [] +2 CONT 198 [] +2 CONT 199 [] +2 CONT 200 [] +2 CONT 201 [] +2 CONT 202 [] +2 CONT 203 [] +2 CONT 204 [] +2 CONT 205 [] +2 CONT 206 [] +2 CONT 207 [] +2 CONT 208 [] +2 CONT 209 [] +2 CONT 210 [] +2 CONT 211 [] +2 CONT 212 [] +2 CONT 213 [] +2 CONT 214 [] +2 CONT 215 [] +2 CONT 216 [] +2 CONT 217 [] +2 CONT 218 [] +2 CONT 219 [] +2 CONT 220 [] +2 CONT 221 [] +2 CONT 222 [] +2 CONT 223 [] +2 CONT 224 [] +2 CONT 225 [] +2 CONT 226 [] +2 CONT 227 [] +2 CONT 228 [] +2 CONT 229 [] +2 CONT 230 [] +2 CONT 231 [] +2 CONT 232 [] +2 CONT 233 [] +2 CONT 234 [] +2 CONT 235 [] +2 CONT 236 [] +2 CONT 237 [] +2 CONT 238 [] +2 CONT 239 [] +2 CONT 240 [] +2 CONT 241 [] +2 CONT 242 [] +2 CONT 243 [] +2 CONT 244 [] +2 CONT 245 [] +2 CONT 246 [] +2 CONT 247 [] +2 CONT 248 [] +2 CONT 249 [] +2 CONT 250 [] +2 CONT 251 [] +2 CONT 252 [] +2 CONT 253 [] +2 CONT 254 [] +2 CONT 255 [] +0 TRLR diff --git a/data/tests/Latin_1_LF.gramps b/data/tests/Latin_1_LF.gramps new file mode 100644 index 000000000..99d113d6c --- /dev/null +++ b/data/tests/Latin_1_LF.gramps @@ -0,0 +1,235 @@ + + + +
+ + + Paul Culley + 11210 Olde Mint House Ln + Tomball + Tx + USA + 77375 + paulr2787@gmail.com + +
+ + + Birth + + + + Death + + + + + + M + + Paul + Culley + + + + + + + + + Table of Latin_1, ISO-8859-1 characters +32 [ ] +33 [!] +34 ["] +35 [#] +36 [$] +37 [%] +38 [&] +39 ['] +40 [(] +41 [)] +42 [*] +43 [+] +44 [,] +45 [-] +46 [.] +47 [/] +48 [0] +49 [1] +50 [2] +51 [3] +52 [4] +53 [5] +54 [6] +55 [7] +56 [8] +57 [9] +58 [:] +59 [;] +60 [<] +61 [=] +62 [>] +63 [?] +64 [@] +65 [A] +66 [B] +67 [C] +68 [D] +69 [E] +70 [F] +71 [G] +72 [H] +73 [I] +74 [J] +75 [K] +76 [L] +77 [M] +78 [N] +79 [O] +80 [P] +81 [Q] +82 [R] +83 [S] +84 [T] +85 [U] +86 [V] +87 [W] +88 [X] +89 [Y] +90 [Z] +91 [[] +92 [\] +93 []] +94 [^] +95 [_] +96 [`] +97 [a] +98 [b] +99 [c] +100 [d] +101 [e] +102 [f] +103 [g] +104 [h] +105 [i] +106 [j] +107 [k] +108 [l] +109 [m] +110 [n] +111 [o] +112 [p] +113 [q] +114 [r] +115 [s] +116 [t] +117 [u] +118 [v] +119 [w] +120 [x] +121 [y] +122 [z] +123 [{] +124 [|] +125 [}] +126 [~] +160 [ ] +161 [¡] +162 [¢] +163 [£] +164 [¤] +165 [¥] +166 [¦] +167 [§] +168 [¨] +169 [©] +170 [ª] +171 [«] +172 [¬] +173 [­] +174 [®] +175 [¯] +176 [°] +177 [±] +178 [²] +179 [³] +180 [´] +181 [µ] +182 [¶] +183 [·] +184 [¸] +185 [¹] +186 [º] +187 [»] +188 [¼] +189 [½] +190 [¾] +191 [¿] +192 [À] +193 [Á] +194 [Â] +195 [Ã] +196 [Ä] +197 [Å] +198 [Æ] +199 [Ç] +200 [È] +201 [É] +202 [Ê] +203 [Ë] +204 [Ì] +205 [Í] +206 [Î] +207 [Ï] +208 [Ð] +209 [Ñ] +210 [Ò] +211 [Ó] +212 [Ô] +213 [Õ] +214 [Ö] +215 [×] +216 [Ø] +217 [Ù] +218 [Ú] +219 [Û] +220 [Ü] +221 [Ý] +222 [Þ] +223 [ß] +224 [à] +225 [á] +226 [â] +227 [ã] +228 [ä] +229 [å] +230 [æ] +231 [ç] +232 [è] +233 [é] +234 [ê] +235 [ë] +236 [ì] +237 [í] +238 [î] +239 [ï] +240 [ð] +241 [ñ] +242 [ò] +243 [ó] +244 [ô] +245 [õ] +246 [ö] +247 [÷] +248 [ø] +249 [ù] +250 [ú] +251 [û] +252 [ü] +253 [ý] +254 [þ] +255 [ÿ] + + +
diff --git a/data/tests/UTF_16_BE_BOM_CR.GED b/data/tests/UTF_16_BE_BOM_CR.GED new file mode 100644 index 000000000..0727a3662 Binary files /dev/null and b/data/tests/UTF_16_BE_BOM_CR.GED differ diff --git a/data/tests/UTF_16_BE_BOM_CR.gramps b/data/tests/UTF_16_BE_BOM_CR.gramps new file mode 100644 index 000000000..a3c1557ba --- /dev/null +++ b/data/tests/UTF_16_BE_BOM_CR.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_16_BE_BOM_CRLF.GED b/data/tests/UTF_16_BE_BOM_CRLF.GED new file mode 100644 index 000000000..27b4a3def Binary files /dev/null and b/data/tests/UTF_16_BE_BOM_CRLF.GED differ diff --git a/data/tests/UTF_16_BE_BOM_CRLF.gramps b/data/tests/UTF_16_BE_BOM_CRLF.gramps new file mode 100644 index 000000000..c7be9f2c9 --- /dev/null +++ b/data/tests/UTF_16_BE_BOM_CRLF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_16_BE_BOM_LF.GED b/data/tests/UTF_16_BE_BOM_LF.GED new file mode 100644 index 000000000..caca0ca38 Binary files /dev/null and b/data/tests/UTF_16_BE_BOM_LF.GED differ diff --git a/data/tests/UTF_16_BE_BOM_LF.gramps b/data/tests/UTF_16_BE_BOM_LF.gramps new file mode 100644 index 000000000..530e0ff9b --- /dev/null +++ b/data/tests/UTF_16_BE_BOM_LF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_16_LE_BOM_CR.GED b/data/tests/UTF_16_LE_BOM_CR.GED new file mode 100644 index 000000000..60dcf91ec Binary files /dev/null and b/data/tests/UTF_16_LE_BOM_CR.GED differ diff --git a/data/tests/UTF_16_LE_BOM_CR.gramps b/data/tests/UTF_16_LE_BOM_CR.gramps new file mode 100644 index 000000000..e9fe94f22 --- /dev/null +++ b/data/tests/UTF_16_LE_BOM_CR.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_16_LE_BOM_CRLF.GED b/data/tests/UTF_16_LE_BOM_CRLF.GED new file mode 100644 index 000000000..59e701e3d Binary files /dev/null and b/data/tests/UTF_16_LE_BOM_CRLF.GED differ diff --git a/data/tests/UTF_16_LE_BOM_CRLF.gramps b/data/tests/UTF_16_LE_BOM_CRLF.gramps new file mode 100644 index 000000000..1571e15c7 --- /dev/null +++ b/data/tests/UTF_16_LE_BOM_CRLF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_16_LE_BOM_LF.GED b/data/tests/UTF_16_LE_BOM_LF.GED new file mode 100644 index 000000000..62b6ce727 Binary files /dev/null and b/data/tests/UTF_16_LE_BOM_LF.GED differ diff --git a/data/tests/UTF_16_LE_BOM_LF.gramps b/data/tests/UTF_16_LE_BOM_LF.gramps new file mode 100644 index 000000000..2718c107e --- /dev/null +++ b/data/tests/UTF_16_LE_BOM_LF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_8_BOM_CR.ged b/data/tests/UTF_8_BOM_CR.ged new file mode 100644 index 000000000..5691e0fef --- /dev/null +++ b/data/tests/UTF_8_BOM_CR.ged @@ -0,0 +1 @@ +0 HEAD 1 CHAR UTF-8 1 SOUR REGISTERED_SOURCE_NAME 1 GEDC 2 VERS 5.5 2 FORM LINEAGE-LINKED 1 NOTE UTF-8 transmission test. 2 CONT The transmission does start with a byte order mark (BOM) 2 CONT Each line is terminated using carriage return. 2 CONT This GEDCOM transmission contains a charcter set test. It consists 2 CONT of a single family (two parents, many children). The parents are used 2 CONT to test the cyrillic and greek letters. In both 'persons' the 2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some 2 CONT small letters of alphabet. 2 CONT The children contain some combined letters and special charcters. 2 CONT The NAME tag of each 'person' is the name of the characters tested 2 CONT within the person. 2 CONT The first children contain some special characters. Here the strings 2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' 2 CONT where 'character name'is the name of the character (like 'british pound') 2 CONT and 'test character' is a single byte representing this character 2 CONT in ANSEL. 2 CONT The last children contain some combined characters. The name tag gives 2 CONT the name of the non-spacing character tested within the 'person'. 2 CONT Within the name the hex-values of the non-spacing character is given 2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are 2 CONT combined with the non-spacing character tested here and which have 2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters 2 CONT without the non-spacing part. 2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC 2 CONT tag contains all latin letters which have a UNICODE code point if 2 CONT combined with a ring above. The DEAT.PLAC tag contain the same 2 CONT charcters combined with this ring. 2 CONT Note: Not all charcters can be displayed on all computers. 2 CONT This strongly depends on the installed fonts and codepages. 2 CONT This file based on the following source: 2 CONT www.unicode.org delivered the connection from the code point names 2 CONT to the actual values. Note, that much more UNICODE characters are 2 CONT possible (like the chinese alphabet). 1 SUBM @SUBMITTER@ 1 DATE 20 JAN 1998 0 @SUBMITTER@ SUBM 1 NAME /H. Eichmann/ 1 ADDR email: h.eichmann@@gmx.de 0 @FATHER@ INDI 1 NAME /cyrillic/ 1 BIRT 2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ 1 DEAT 2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя 1 SEX M 1 FAMS @FAMILY@ 0 @MOTHER@ INDI 1 NAME /greek/ 1 BIRT 2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ 1 DEAT 2 PLAC αβγδεζηθικλμνξοπρςστυφχψω 1 SEX F 1 FAMS @FAMILY@ 0 @CHILD0@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 0/ 1 BIRT 2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) 1 DEAT 2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) 0 @CHILD1@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 1/ 1 BIRT 2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) 1 DEAT 2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) 0 @CHILD2@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 2/ 1 BIRT 2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) 1 DEAT 2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) 0 @CHILD3@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 3/ 1 BIRT 2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) 1 DEAT 2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) 0 @CHILD4@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0309/HOOK ABOVE/ 1 BIRT 2 PLAC AEIOU,Yaeio,uy 1 DEAT 2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ 0 @CHILD5@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0300/GRAVE/ 1 BIRT 2 PLAC AEIOU,WYaei,ouwy 1 DEAT 2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ 0 @CHILD6@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0301/ACUTE/ 1 BIRT 2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz 1 DEAT 2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź 0 @CHILD7@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0302/CIRCUMFLEX/ 1 BIRT 2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z 1 DEAT 2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ 0 @CHILD8@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0303/TILDE/ 1 BIRT 2 PLAC AEINO,UVYae,inouv,y 1 DEAT 2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ 0 @CHILD9@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0304/MACRON/ 1 BIRT 2 PLAC AEGIO,Uaegi,ou 1 DEAT 2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū 0 @CHILD10@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0306/BREVE/ 1 BIRT 2 PLAC AEGIO,Uaegi,ou 1 DEAT 2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ 0 @CHILD11@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0307/DOT ABOVE/ 1 BIRT 2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz 1 DEAT 2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż 0 @CHILD12@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0308/DIAERESIS/ 1 BIRT 2 PLAC AEHIO,UWXYa,ehiot,uwxy 1 DEAT 2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ 0 @CHILD13@ INDI 1 FAMC @FAMILY@ 1 NAME code: 030C/CARON/ 1 BIRT 2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z 1 DEAT 2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž 0 @CHILD14@ INDI 1 FAMC @FAMILY@ 1 NAME code: 030A/RING ABOVE/ 1 BIRT 2 PLAC AUauw,y 1 DEAT 2 PLAC ÅŮåůẘ,ẙ 0 @CHILD15@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE20/LIGATURE LEFT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD16@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE21/LIGATURE RIGHT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD17@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0315/COMMA ABOVE RIGHT/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD18@ INDI 1 FAMC @FAMILY@ 1 NAME code: 030B/DOUBLE ACUTE/ 1 BIRT 2 PLAC OUou 1 DEAT 2 PLAC ŐŰőű 0 @CHILD19@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0310/CANDRABINDU/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD20@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0327/CEDILLA/ 1 BIRT 2 PLAC CDGHK,LNRST,cdghk,lnrst 1 DEAT 2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ 0 @CHILD21@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0328/OGONEK/ 1 BIRT 2 PLAC AEIOU,aeiou 1 DEAT 2 PLAC ĄĘĮǪŲ,ąęįǫų 0 @CHILD22@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0323/DOT BELOW/ 1 BIRT 2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz 1 DEAT 2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ 0 @CHILD23@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0324/DIAERESIS BELOW/ 1 BIRT 2 PLAC Uu 1 DEAT 2 PLAC Ṳṳ 0 @CHILD24@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0325/RING BELOW/ 1 BIRT 2 PLAC Aa 1 DEAT 2 PLAC Ḁḁ 0 @CHILD25@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0333/DOUBLE LOW LINE/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD26@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0332/LINE BELOW/ 1 BIRT 2 PLAC BDKLN,RTZbd,hklnr,tz 1 DEAT 2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ 0 @CHILD27@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0326/COMMA BELOW/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD28@ INDI 1 FAMC @FAMILY@ 1 NAME code: 031C/LEFT HALF RING BELOW/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD29@ INDI 1 FAMC @FAMILY@ 1 NAME code: 032E/BREVE BELOW/ 1 BIRT 2 PLAC Hh 1 DEAT 2 PLAC Ḫḫ 0 @CHILD30@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE22/DOUBLE TILDE LEFT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD31@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD32@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0313/COMMA ABOVE/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @FAMILY@ FAM 1 HUSB @FATHER@ 1 WIFE @MOTHER@ 1 CHIL @CHILD0@ 1 CHIL @CHILD1@ 1 CHIL @CHILD2@ 1 CHIL @CHILD3@ 1 CHIL @CHILD4@ 1 CHIL @CHILD5@ 1 CHIL @CHILD6@ 1 CHIL @CHILD7@ 1 CHIL @CHILD8@ 1 CHIL @CHILD9@ 1 CHIL @CHILD10@ 1 CHIL @CHILD11@ 1 CHIL @CHILD12@ 1 CHIL @CHILD13@ 1 CHIL @CHILD14@ 1 CHIL @CHILD15@ 1 CHIL @CHILD16@ 1 CHIL @CHILD17@ 1 CHIL @CHILD18@ 1 CHIL @CHILD19@ 1 CHIL @CHILD20@ 1 CHIL @CHILD21@ 1 CHIL @CHILD22@ 1 CHIL @CHILD23@ 1 CHIL @CHILD24@ 1 CHIL @CHILD25@ 1 CHIL @CHILD26@ 1 CHIL @CHILD27@ 1 CHIL @CHILD28@ 1 CHIL @CHILD29@ 1 CHIL @CHILD30@ 1 CHIL @CHILD31@ 1 CHIL @CHILD32@ 0 TRLR \ No newline at end of file diff --git a/data/tests/UTF_8_BOM_CR.gramps b/data/tests/UTF_8_BOM_CR.gramps new file mode 100644 index 000000000..f033f56b5 --- /dev/null +++ b/data/tests/UTF_8_BOM_CR.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_8_BOM_CRLF.GED b/data/tests/UTF_8_BOM_CRLF.GED new file mode 100644 index 000000000..2034e0ec8 --- /dev/null +++ b/data/tests/UTF_8_BOM_CRLF.GED @@ -0,0 +1,328 @@ +0 HEAD +1 CHAR UTF-8 +1 SOUR REGISTERED_SOURCE_NAME +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 NOTE UTF-8 transmission test. +2 CONT The transmission does start with a byte order mark (BOM) +2 CONT Each line is terminated using carriage return + line feed. +2 CONT This GEDCOM transmission contains a charcter set test. It consists +2 CONT of a single family (two parents, many children). The parents are used +2 CONT to test the cyrillic and greek letters. In both 'persons' the +2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +2 CONT small letters of alphabet. +2 CONT The children contain some combined letters and special charcters. +2 CONT The NAME tag of each 'person' is the name of the characters tested +2 CONT within the person. +2 CONT The first children contain some special characters. Here the strings +2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +2 CONT where 'character name'is the name of the character (like 'british pound') +2 CONT and 'test character' is a single byte representing this character +2 CONT in ANSEL. +2 CONT The last children contain some combined characters. The name tag gives +2 CONT the name of the non-spacing character tested within the 'person'. +2 CONT Within the name the hex-values of the non-spacing character is given +2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are +2 CONT combined with the non-spacing character tested here and which have +2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters +2 CONT without the non-spacing part. +2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC +2 CONT tag contains all latin letters which have a UNICODE code point if +2 CONT combined with a ring above. The DEAT.PLAC tag contain the same +2 CONT charcters combined with this ring. +2 CONT Note: Not all charcters can be displayed on all computers. +2 CONT This strongly depends on the installed fonts and codepages. +2 CONT This file based on the following source: +2 CONT www.unicode.org delivered the connection from the code point names +2 CONT to the actual values. Note, that much more UNICODE characters are +2 CONT possible (like the chinese alphabet). +1 SUBM @SUBMITTER@ +1 DATE 20 JAN 1998 +0 @SUBMITTER@ SUBM +1 NAME /H. Eichmann/ +1 ADDR email: h.eichmann@@gmx.de +0 @FATHER@ INDI +1 NAME /cyrillic/ +1 BIRT +2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ +1 DEAT +2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя +1 SEX M +1 FAMS @FAMILY@ +0 @MOTHER@ INDI +1 NAME /greek/ +1 BIRT +2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +1 DEAT +2 PLAC αβγδεζηθικλμνξοπρςστυφχψω +1 SEX F +1 FAMS @FAMILY@ +0 @CHILD0@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 0/ +1 BIRT +2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) +1 DEAT +2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) +0 @CHILD1@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 1/ +1 BIRT +2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) +1 DEAT +2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) +0 @CHILD2@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 2/ +1 BIRT +2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) +1 DEAT +2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) +0 @CHILD3@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 3/ +1 BIRT +2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) +1 DEAT +2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) +0 @CHILD4@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0309/HOOK ABOVE/ +1 BIRT +2 PLAC AEIOU,Yaeio,uy +1 DEAT +2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ +0 @CHILD5@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0300/GRAVE/ +1 BIRT +2 PLAC AEIOU,WYaei,ouwy +1 DEAT +2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ +0 @CHILD6@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0301/ACUTE/ +1 BIRT +2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz +1 DEAT +2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź +0 @CHILD7@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0302/CIRCUMFLEX/ +1 BIRT +2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z +1 DEAT +2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ +0 @CHILD8@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0303/TILDE/ +1 BIRT +2 PLAC AEINO,UVYae,inouv,y +1 DEAT +2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ +0 @CHILD9@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0304/MACRON/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū +0 @CHILD10@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0306/BREVE/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ +0 @CHILD11@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0307/DOT ABOVE/ +1 BIRT +2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz +1 DEAT +2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż +0 @CHILD12@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0308/DIAERESIS/ +1 BIRT +2 PLAC AEHIO,UWXYa,ehiot,uwxy +1 DEAT +2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ +0 @CHILD13@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030C/CARON/ +1 BIRT +2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z +1 DEAT +2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž +0 @CHILD14@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030A/RING ABOVE/ +1 BIRT +2 PLAC AUauw,y +1 DEAT +2 PLAC ÅŮåůẘ,ẙ +0 @CHILD15@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE20/LIGATURE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD16@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE21/LIGATURE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD17@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0315/COMMA ABOVE RIGHT/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD18@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030B/DOUBLE ACUTE/ +1 BIRT +2 PLAC OUou +1 DEAT +2 PLAC ŐŰőű +0 @CHILD19@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0310/CANDRABINDU/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD20@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0327/CEDILLA/ +1 BIRT +2 PLAC CDGHK,LNRST,cdghk,lnrst +1 DEAT +2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ +0 @CHILD21@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0328/OGONEK/ +1 BIRT +2 PLAC AEIOU,aeiou +1 DEAT +2 PLAC ĄĘĮǪŲ,ąęįǫų +0 @CHILD22@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0323/DOT BELOW/ +1 BIRT +2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz +1 DEAT +2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ +0 @CHILD23@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0324/DIAERESIS BELOW/ +1 BIRT +2 PLAC Uu +1 DEAT +2 PLAC Ṳṳ +0 @CHILD24@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0325/RING BELOW/ +1 BIRT +2 PLAC Aa +1 DEAT +2 PLAC Ḁḁ +0 @CHILD25@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0333/DOUBLE LOW LINE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD26@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0332/LINE BELOW/ +1 BIRT +2 PLAC BDKLN,RTZbd,hklnr,tz +1 DEAT +2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ +0 @CHILD27@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0326/COMMA BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD28@ INDI +1 FAMC @FAMILY@ +1 NAME code: 031C/LEFT HALF RING BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD29@ INDI +1 FAMC @FAMILY@ +1 NAME code: 032E/BREVE BELOW/ +1 BIRT +2 PLAC Hh +1 DEAT +2 PLAC Ḫḫ +0 @CHILD30@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE22/DOUBLE TILDE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD31@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD32@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0313/COMMA ABOVE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @FAMILY@ FAM +1 HUSB @FATHER@ +1 WIFE @MOTHER@ +1 CHIL @CHILD0@ +1 CHIL @CHILD1@ +1 CHIL @CHILD2@ +1 CHIL @CHILD3@ +1 CHIL @CHILD4@ +1 CHIL @CHILD5@ +1 CHIL @CHILD6@ +1 CHIL @CHILD7@ +1 CHIL @CHILD8@ +1 CHIL @CHILD9@ +1 CHIL @CHILD10@ +1 CHIL @CHILD11@ +1 CHIL @CHILD12@ +1 CHIL @CHILD13@ +1 CHIL @CHILD14@ +1 CHIL @CHILD15@ +1 CHIL @CHILD16@ +1 CHIL @CHILD17@ +1 CHIL @CHILD18@ +1 CHIL @CHILD19@ +1 CHIL @CHILD20@ +1 CHIL @CHILD21@ +1 CHIL @CHILD22@ +1 CHIL @CHILD23@ +1 CHIL @CHILD24@ +1 CHIL @CHILD25@ +1 CHIL @CHILD26@ +1 CHIL @CHILD27@ +1 CHIL @CHILD28@ +1 CHIL @CHILD29@ +1 CHIL @CHILD30@ +1 CHIL @CHILD31@ +1 CHIL @CHILD32@ +0 TRLR diff --git a/data/tests/UTF_8_BOM_CRLF.gramps b/data/tests/UTF_8_BOM_CRLF.gramps new file mode 100644 index 000000000..4a49e4f9a --- /dev/null +++ b/data/tests/UTF_8_BOM_CRLF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_8_BOM_LF.GED b/data/tests/UTF_8_BOM_LF.GED new file mode 100644 index 000000000..4ddb63104 --- /dev/null +++ b/data/tests/UTF_8_BOM_LF.GED @@ -0,0 +1,328 @@ +0 HEAD +1 CHAR UTF-8 +1 SOUR REGISTERED_SOURCE_NAME +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 NOTE UTF-8 transmission test. +2 CONT The transmission does start with a byte order mark (BOM) +2 CONT Each line is terminated using line feed. +2 CONT This GEDCOM transmission contains a charcter set test. It consists +2 CONT of a single family (two parents, many children). The parents are used +2 CONT to test the cyrillic and greek letters. In both 'persons' the +2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +2 CONT small letters of alphabet. +2 CONT The children contain some combined letters and special charcters. +2 CONT The NAME tag of each 'person' is the name of the characters tested +2 CONT within the person. +2 CONT The first children contain some special characters. Here the strings +2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +2 CONT where 'character name'is the name of the character (like 'british pound') +2 CONT and 'test character' is a single byte representing this character +2 CONT in ANSEL. +2 CONT The last children contain some combined characters. The name tag gives +2 CONT the name of the non-spacing character tested within the 'person'. +2 CONT Within the name the hex-values of the non-spacing character is given +2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are +2 CONT combined with the non-spacing character tested here and which have +2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters +2 CONT without the non-spacing part. +2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC +2 CONT tag contains all latin letters which have a UNICODE code point if +2 CONT combined with a ring above. The DEAT.PLAC tag contain the same +2 CONT charcters combined with this ring. +2 CONT Note: Not all charcters can be displayed on all computers. +2 CONT This strongly depends on the installed fonts and codepages. +2 CONT This file based on the following source: +2 CONT www.unicode.org delivered the connection from the code point names +2 CONT to the actual values. Note, that much more UNICODE characters are +2 CONT possible (like the chinese alphabet). +1 SUBM @SUBMITTER@ +1 DATE 20 JAN 1998 +0 @SUBMITTER@ SUBM +1 NAME /H. Eichmann/ +1 ADDR email: h.eichmann@@gmx.de +0 @FATHER@ INDI +1 NAME /cyrillic/ +1 BIRT +2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ +1 DEAT +2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя +1 SEX M +1 FAMS @FAMILY@ +0 @MOTHER@ INDI +1 NAME /greek/ +1 BIRT +2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +1 DEAT +2 PLAC αβγδεζηθικλμνξοπρςστυφχψω +1 SEX F +1 FAMS @FAMILY@ +0 @CHILD0@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 0/ +1 BIRT +2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) +1 DEAT +2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) +0 @CHILD1@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 1/ +1 BIRT +2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) +1 DEAT +2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) +0 @CHILD2@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 2/ +1 BIRT +2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) +1 DEAT +2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) +0 @CHILD3@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 3/ +1 BIRT +2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) +1 DEAT +2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) +0 @CHILD4@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0309/HOOK ABOVE/ +1 BIRT +2 PLAC AEIOU,Yaeio,uy +1 DEAT +2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ +0 @CHILD5@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0300/GRAVE/ +1 BIRT +2 PLAC AEIOU,WYaei,ouwy +1 DEAT +2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ +0 @CHILD6@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0301/ACUTE/ +1 BIRT +2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz +1 DEAT +2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź +0 @CHILD7@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0302/CIRCUMFLEX/ +1 BIRT +2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z +1 DEAT +2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ +0 @CHILD8@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0303/TILDE/ +1 BIRT +2 PLAC AEINO,UVYae,inouv,y +1 DEAT +2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ +0 @CHILD9@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0304/MACRON/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū +0 @CHILD10@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0306/BREVE/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ +0 @CHILD11@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0307/DOT ABOVE/ +1 BIRT +2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz +1 DEAT +2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż +0 @CHILD12@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0308/DIAERESIS/ +1 BIRT +2 PLAC AEHIO,UWXYa,ehiot,uwxy +1 DEAT +2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ +0 @CHILD13@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030C/CARON/ +1 BIRT +2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z +1 DEAT +2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž +0 @CHILD14@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030A/RING ABOVE/ +1 BIRT +2 PLAC AUauw,y +1 DEAT +2 PLAC ÅŮåůẘ,ẙ +0 @CHILD15@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE20/LIGATURE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD16@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE21/LIGATURE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD17@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0315/COMMA ABOVE RIGHT/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD18@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030B/DOUBLE ACUTE/ +1 BIRT +2 PLAC OUou +1 DEAT +2 PLAC ŐŰőű +0 @CHILD19@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0310/CANDRABINDU/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD20@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0327/CEDILLA/ +1 BIRT +2 PLAC CDGHK,LNRST,cdghk,lnrst +1 DEAT +2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ +0 @CHILD21@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0328/OGONEK/ +1 BIRT +2 PLAC AEIOU,aeiou +1 DEAT +2 PLAC ĄĘĮǪŲ,ąęįǫų +0 @CHILD22@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0323/DOT BELOW/ +1 BIRT +2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz +1 DEAT +2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ +0 @CHILD23@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0324/DIAERESIS BELOW/ +1 BIRT +2 PLAC Uu +1 DEAT +2 PLAC Ṳṳ +0 @CHILD24@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0325/RING BELOW/ +1 BIRT +2 PLAC Aa +1 DEAT +2 PLAC Ḁḁ +0 @CHILD25@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0333/DOUBLE LOW LINE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD26@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0332/LINE BELOW/ +1 BIRT +2 PLAC BDKLN,RTZbd,hklnr,tz +1 DEAT +2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ +0 @CHILD27@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0326/COMMA BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD28@ INDI +1 FAMC @FAMILY@ +1 NAME code: 031C/LEFT HALF RING BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD29@ INDI +1 FAMC @FAMILY@ +1 NAME code: 032E/BREVE BELOW/ +1 BIRT +2 PLAC Hh +1 DEAT +2 PLAC Ḫḫ +0 @CHILD30@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE22/DOUBLE TILDE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD31@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD32@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0313/COMMA ABOVE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @FAMILY@ FAM +1 HUSB @FATHER@ +1 WIFE @MOTHER@ +1 CHIL @CHILD0@ +1 CHIL @CHILD1@ +1 CHIL @CHILD2@ +1 CHIL @CHILD3@ +1 CHIL @CHILD4@ +1 CHIL @CHILD5@ +1 CHIL @CHILD6@ +1 CHIL @CHILD7@ +1 CHIL @CHILD8@ +1 CHIL @CHILD9@ +1 CHIL @CHILD10@ +1 CHIL @CHILD11@ +1 CHIL @CHILD12@ +1 CHIL @CHILD13@ +1 CHIL @CHILD14@ +1 CHIL @CHILD15@ +1 CHIL @CHILD16@ +1 CHIL @CHILD17@ +1 CHIL @CHILD18@ +1 CHIL @CHILD19@ +1 CHIL @CHILD20@ +1 CHIL @CHILD21@ +1 CHIL @CHILD22@ +1 CHIL @CHILD23@ +1 CHIL @CHILD24@ +1 CHIL @CHILD25@ +1 CHIL @CHILD26@ +1 CHIL @CHILD27@ +1 CHIL @CHILD28@ +1 CHIL @CHILD29@ +1 CHIL @CHILD30@ +1 CHIL @CHILD31@ +1 CHIL @CHILD32@ +0 TRLR diff --git a/data/tests/UTF_8_BOM_LF.gramps b/data/tests/UTF_8_BOM_LF.gramps new file mode 100644 index 000000000..f37368e37 --- /dev/null +++ b/data/tests/UTF_8_BOM_LF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_8_NOBOM_CR.GED b/data/tests/UTF_8_NOBOM_CR.GED new file mode 100644 index 000000000..fa788206f --- /dev/null +++ b/data/tests/UTF_8_NOBOM_CR.GED @@ -0,0 +1 @@ +0 HEAD 1 CHAR UTF-8 1 SOUR REGISTERED_SOURCE_NAME 1 GEDC 2 VERS 5.5 2 FORM LINEAGE-LINKED 1 NOTE UTF-8 transmission test. 2 CONT The transmission does NOT start with a byte order mark (BOM) 2 CONT Each line is terminated using carriage return. 2 CONT This GEDCOM transmission contains a charcter set test. It consists 2 CONT of a single family (two parents, many children). The parents are used 2 CONT to test the cyrillic and greek letters. In both 'persons' the 2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some 2 CONT small letters of alphabet. 2 CONT The children contain some combined letters and special charcters. 2 CONT The NAME tag of each 'person' is the name of the characters tested 2 CONT within the person. 2 CONT The first children contain some special characters. Here the strings 2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' 2 CONT where 'character name'is the name of the character (like 'british pound') 2 CONT and 'test character' is a single byte representing this character 2 CONT in ANSEL. 2 CONT The last children contain some combined characters. The name tag gives 2 CONT the name of the non-spacing character tested within the 'person'. 2 CONT Within the name the hex-values of the non-spacing character is given 2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are 2 CONT combined with the non-spacing character tested here and which have 2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters 2 CONT without the non-spacing part. 2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC 2 CONT tag contains all latin letters which have a UNICODE code point if 2 CONT combined with a ring above. The DEAT.PLAC tag contain the same 2 CONT charcters combined with this ring. 2 CONT Note: Not all charcters can be displayed on all computers. 2 CONT This strongly depends on the installed fonts and codepages. 2 CONT This file based on the following source: 2 CONT www.unicode.org delivered the connection from the code point names 2 CONT to the actual values. Note, that much more UNICODE characters are 2 CONT possible (like the chinese alphabet). 1 SUBM @SUBMITTER@ 1 DATE 20 JAN 1998 0 @SUBMITTER@ SUBM 1 NAME /H. Eichmann/ 1 ADDR email: h.eichmann@@gmx.de 0 @FATHER@ INDI 1 NAME /cyrillic/ 1 BIRT 2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ 1 DEAT 2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя 1 SEX M 1 FAMS @FAMILY@ 0 @MOTHER@ INDI 1 NAME /greek/ 1 BIRT 2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ 1 DEAT 2 PLAC αβγδεζηθικλμνξοπρςστυφχψω 1 SEX F 1 FAMS @FAMILY@ 0 @CHILD0@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 0/ 1 BIRT 2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) 1 DEAT 2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) 0 @CHILD1@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 1/ 1 BIRT 2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) 1 DEAT 2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) 0 @CHILD2@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 2/ 1 BIRT 2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) 1 DEAT 2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) 0 @CHILD3@ INDI 1 FAMC @FAMILY@ 1 NAME /Special Characters 3/ 1 BIRT 2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) 1 DEAT 2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) 0 @CHILD4@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0309/HOOK ABOVE/ 1 BIRT 2 PLAC AEIOU,Yaeio,uy 1 DEAT 2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ 0 @CHILD5@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0300/GRAVE/ 1 BIRT 2 PLAC AEIOU,WYaei,ouwy 1 DEAT 2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ 0 @CHILD6@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0301/ACUTE/ 1 BIRT 2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz 1 DEAT 2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź 0 @CHILD7@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0302/CIRCUMFLEX/ 1 BIRT 2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z 1 DEAT 2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ 0 @CHILD8@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0303/TILDE/ 1 BIRT 2 PLAC AEINO,UVYae,inouv,y 1 DEAT 2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ 0 @CHILD9@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0304/MACRON/ 1 BIRT 2 PLAC AEGIO,Uaegi,ou 1 DEAT 2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū 0 @CHILD10@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0306/BREVE/ 1 BIRT 2 PLAC AEGIO,Uaegi,ou 1 DEAT 2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ 0 @CHILD11@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0307/DOT ABOVE/ 1 BIRT 2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz 1 DEAT 2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż 0 @CHILD12@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0308/DIAERESIS/ 1 BIRT 2 PLAC AEHIO,UWXYa,ehiot,uwxy 1 DEAT 2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ 0 @CHILD13@ INDI 1 FAMC @FAMILY@ 1 NAME code: 030C/CARON/ 1 BIRT 2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z 1 DEAT 2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž 0 @CHILD14@ INDI 1 FAMC @FAMILY@ 1 NAME code: 030A/RING ABOVE/ 1 BIRT 2 PLAC AUauw,y 1 DEAT 2 PLAC ÅŮåůẘ,ẙ 0 @CHILD15@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE20/LIGATURE LEFT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD16@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE21/LIGATURE RIGHT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD17@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0315/COMMA ABOVE RIGHT/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD18@ INDI 1 FAMC @FAMILY@ 1 NAME code: 030B/DOUBLE ACUTE/ 1 BIRT 2 PLAC OUou 1 DEAT 2 PLAC ŐŰőű 0 @CHILD19@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0310/CANDRABINDU/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD20@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0327/CEDILLA/ 1 BIRT 2 PLAC CDGHK,LNRST,cdghk,lnrst 1 DEAT 2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ 0 @CHILD21@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0328/OGONEK/ 1 BIRT 2 PLAC AEIOU,aeiou 1 DEAT 2 PLAC ĄĘĮǪŲ,ąęįǫų 0 @CHILD22@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0323/DOT BELOW/ 1 BIRT 2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz 1 DEAT 2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ 0 @CHILD23@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0324/DIAERESIS BELOW/ 1 BIRT 2 PLAC Uu 1 DEAT 2 PLAC Ṳṳ 0 @CHILD24@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0325/RING BELOW/ 1 BIRT 2 PLAC Aa 1 DEAT 2 PLAC Ḁḁ 0 @CHILD25@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0333/DOUBLE LOW LINE/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD26@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0332/LINE BELOW/ 1 BIRT 2 PLAC BDKLN,RTZbd,hklnr,tz 1 DEAT 2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ 0 @CHILD27@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0326/COMMA BELOW/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD28@ INDI 1 FAMC @FAMILY@ 1 NAME code: 031C/LEFT HALF RING BELOW/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD29@ INDI 1 FAMC @FAMILY@ 1 NAME code: 032E/BREVE BELOW/ 1 BIRT 2 PLAC Hh 1 DEAT 2 PLAC Ḫḫ 0 @CHILD30@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE22/DOUBLE TILDE LEFT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD31@ INDI 1 FAMC @FAMILY@ 1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @CHILD32@ INDI 1 FAMC @FAMILY@ 1 NAME code: 0313/COMMA ABOVE/ 1 BIRT 2 PLAC - none - 1 DEAT 2 PLAC - none - 0 @FAMILY@ FAM 1 HUSB @FATHER@ 1 WIFE @MOTHER@ 1 CHIL @CHILD0@ 1 CHIL @CHILD1@ 1 CHIL @CHILD2@ 1 CHIL @CHILD3@ 1 CHIL @CHILD4@ 1 CHIL @CHILD5@ 1 CHIL @CHILD6@ 1 CHIL @CHILD7@ 1 CHIL @CHILD8@ 1 CHIL @CHILD9@ 1 CHIL @CHILD10@ 1 CHIL @CHILD11@ 1 CHIL @CHILD12@ 1 CHIL @CHILD13@ 1 CHIL @CHILD14@ 1 CHIL @CHILD15@ 1 CHIL @CHILD16@ 1 CHIL @CHILD17@ 1 CHIL @CHILD18@ 1 CHIL @CHILD19@ 1 CHIL @CHILD20@ 1 CHIL @CHILD21@ 1 CHIL @CHILD22@ 1 CHIL @CHILD23@ 1 CHIL @CHILD24@ 1 CHIL @CHILD25@ 1 CHIL @CHILD26@ 1 CHIL @CHILD27@ 1 CHIL @CHILD28@ 1 CHIL @CHILD29@ 1 CHIL @CHILD30@ 1 CHIL @CHILD31@ 1 CHIL @CHILD32@ 0 TRLR \ No newline at end of file diff --git a/data/tests/UTF_8_NOBOM_CR.gramps b/data/tests/UTF_8_NOBOM_CR.gramps new file mode 100644 index 000000000..64540e707 --- /dev/null +++ b/data/tests/UTF_8_NOBOM_CR.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_8_NOBOM_CRLF.GED b/data/tests/UTF_8_NOBOM_CRLF.GED new file mode 100644 index 000000000..747d59f1e --- /dev/null +++ b/data/tests/UTF_8_NOBOM_CRLF.GED @@ -0,0 +1,328 @@ +0 HEAD +1 CHAR UTF-8 +1 SOUR REGISTERED_SOURCE_NAME +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 NOTE UTF-8 transmission test. +2 CONT The transmission does NOT start with a byte order mark (BOM) +2 CONT Each line is terminated using carriage return + line feed. +2 CONT This GEDCOM transmission contains a charcter set test. It consists +2 CONT of a single family (two parents, many children). The parents are used +2 CONT to test the cyrillic and greek letters. In both 'persons' the +2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +2 CONT small letters of alphabet. +2 CONT The children contain some combined letters and special charcters. +2 CONT The NAME tag of each 'person' is the name of the characters tested +2 CONT within the person. +2 CONT The first children contain some special characters. Here the strings +2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +2 CONT where 'character name'is the name of the character (like 'british pound') +2 CONT and 'test character' is a single byte representing this character +2 CONT in ANSEL. +2 CONT The last children contain some combined characters. The name tag gives +2 CONT the name of the non-spacing character tested within the 'person'. +2 CONT Within the name the hex-values of the non-spacing character is given +2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are +2 CONT combined with the non-spacing character tested here and which have +2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters +2 CONT without the non-spacing part. +2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC +2 CONT tag contains all latin letters which have a UNICODE code point if +2 CONT combined with a ring above. The DEAT.PLAC tag contain the same +2 CONT charcters combined with this ring. +2 CONT Note: Not all charcters can be displayed on all computers. +2 CONT This strongly depends on the installed fonts and codepages. +2 CONT This file based on the following source: +2 CONT www.unicode.org delivered the connection from the code point names +2 CONT to the actual values. Note, that much more UNICODE characters are +2 CONT possible (like the chinese alphabet). +1 SUBM @SUBMITTER@ +1 DATE 20 JAN 1998 +0 @SUBMITTER@ SUBM +1 NAME /H. Eichmann/ +1 ADDR email: h.eichmann@@gmx.de +0 @FATHER@ INDI +1 NAME /cyrillic/ +1 BIRT +2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ +1 DEAT +2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя +1 SEX M +1 FAMS @FAMILY@ +0 @MOTHER@ INDI +1 NAME /greek/ +1 BIRT +2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +1 DEAT +2 PLAC αβγδεζηθικλμνξοπρςστυφχψω +1 SEX F +1 FAMS @FAMILY@ +0 @CHILD0@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 0/ +1 BIRT +2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) +1 DEAT +2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) +0 @CHILD1@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 1/ +1 BIRT +2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) +1 DEAT +2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) +0 @CHILD2@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 2/ +1 BIRT +2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) +1 DEAT +2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) +0 @CHILD3@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 3/ +1 BIRT +2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) +1 DEAT +2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) +0 @CHILD4@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0309/HOOK ABOVE/ +1 BIRT +2 PLAC AEIOU,Yaeio,uy +1 DEAT +2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ +0 @CHILD5@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0300/GRAVE/ +1 BIRT +2 PLAC AEIOU,WYaei,ouwy +1 DEAT +2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ +0 @CHILD6@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0301/ACUTE/ +1 BIRT +2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz +1 DEAT +2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź +0 @CHILD7@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0302/CIRCUMFLEX/ +1 BIRT +2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z +1 DEAT +2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ +0 @CHILD8@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0303/TILDE/ +1 BIRT +2 PLAC AEINO,UVYae,inouv,y +1 DEAT +2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ +0 @CHILD9@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0304/MACRON/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū +0 @CHILD10@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0306/BREVE/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ +0 @CHILD11@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0307/DOT ABOVE/ +1 BIRT +2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz +1 DEAT +2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż +0 @CHILD12@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0308/DIAERESIS/ +1 BIRT +2 PLAC AEHIO,UWXYa,ehiot,uwxy +1 DEAT +2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ +0 @CHILD13@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030C/CARON/ +1 BIRT +2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z +1 DEAT +2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž +0 @CHILD14@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030A/RING ABOVE/ +1 BIRT +2 PLAC AUauw,y +1 DEAT +2 PLAC ÅŮåůẘ,ẙ +0 @CHILD15@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE20/LIGATURE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD16@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE21/LIGATURE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD17@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0315/COMMA ABOVE RIGHT/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD18@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030B/DOUBLE ACUTE/ +1 BIRT +2 PLAC OUou +1 DEAT +2 PLAC ŐŰőű +0 @CHILD19@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0310/CANDRABINDU/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD20@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0327/CEDILLA/ +1 BIRT +2 PLAC CDGHK,LNRST,cdghk,lnrst +1 DEAT +2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ +0 @CHILD21@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0328/OGONEK/ +1 BIRT +2 PLAC AEIOU,aeiou +1 DEAT +2 PLAC ĄĘĮǪŲ,ąęįǫų +0 @CHILD22@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0323/DOT BELOW/ +1 BIRT +2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz +1 DEAT +2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ +0 @CHILD23@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0324/DIAERESIS BELOW/ +1 BIRT +2 PLAC Uu +1 DEAT +2 PLAC Ṳṳ +0 @CHILD24@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0325/RING BELOW/ +1 BIRT +2 PLAC Aa +1 DEAT +2 PLAC Ḁḁ +0 @CHILD25@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0333/DOUBLE LOW LINE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD26@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0332/LINE BELOW/ +1 BIRT +2 PLAC BDKLN,RTZbd,hklnr,tz +1 DEAT +2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ +0 @CHILD27@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0326/COMMA BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD28@ INDI +1 FAMC @FAMILY@ +1 NAME code: 031C/LEFT HALF RING BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD29@ INDI +1 FAMC @FAMILY@ +1 NAME code: 032E/BREVE BELOW/ +1 BIRT +2 PLAC Hh +1 DEAT +2 PLAC Ḫḫ +0 @CHILD30@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE22/DOUBLE TILDE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD31@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD32@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0313/COMMA ABOVE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @FAMILY@ FAM +1 HUSB @FATHER@ +1 WIFE @MOTHER@ +1 CHIL @CHILD0@ +1 CHIL @CHILD1@ +1 CHIL @CHILD2@ +1 CHIL @CHILD3@ +1 CHIL @CHILD4@ +1 CHIL @CHILD5@ +1 CHIL @CHILD6@ +1 CHIL @CHILD7@ +1 CHIL @CHILD8@ +1 CHIL @CHILD9@ +1 CHIL @CHILD10@ +1 CHIL @CHILD11@ +1 CHIL @CHILD12@ +1 CHIL @CHILD13@ +1 CHIL @CHILD14@ +1 CHIL @CHILD15@ +1 CHIL @CHILD16@ +1 CHIL @CHILD17@ +1 CHIL @CHILD18@ +1 CHIL @CHILD19@ +1 CHIL @CHILD20@ +1 CHIL @CHILD21@ +1 CHIL @CHILD22@ +1 CHIL @CHILD23@ +1 CHIL @CHILD24@ +1 CHIL @CHILD25@ +1 CHIL @CHILD26@ +1 CHIL @CHILD27@ +1 CHIL @CHILD28@ +1 CHIL @CHILD29@ +1 CHIL @CHILD30@ +1 CHIL @CHILD31@ +1 CHIL @CHILD32@ +0 TRLR diff --git a/data/tests/UTF_8_NOBOM_CRLF.gramps b/data/tests/UTF_8_NOBOM_CRLF.gramps new file mode 100644 index 000000000..c7e30b86e --- /dev/null +++ b/data/tests/UTF_8_NOBOM_CRLF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/UTF_8_NOBOM_LF.GED b/data/tests/UTF_8_NOBOM_LF.GED new file mode 100644 index 000000000..d95032c4d --- /dev/null +++ b/data/tests/UTF_8_NOBOM_LF.GED @@ -0,0 +1,328 @@ +0 HEAD +1 CHAR UTF-8 +1 SOUR REGISTERED_SOURCE_NAME +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 NOTE UTF-8 transmission test. +2 CONT The transmission does NOT start with a byte order mark (BOM) +2 CONT Each line is terminated using line feed. +2 CONT This GEDCOM transmission contains a charcter set test. It consists +2 CONT of a single family (two parents, many children). The parents are used +2 CONT to test the cyrillic and greek letters. In both 'persons' the +2 CONT BIRT.PLAC tag contains some capital and the DEAT.PLAC tag some +2 CONT small letters of alphabet. +2 CONT The children contain some combined letters and special charcters. +2 CONT The NAME tag of each 'person' is the name of the characters tested +2 CONT within the person. +2 CONT The first children contain some special characters. Here the strings +2 CONT given in BIRT.PLAC and DEAT.PLAC are 'character name (test character), ...' +2 CONT where 'character name'is the name of the character (like 'british pound') +2 CONT and 'test character' is a single byte representing this character +2 CONT in ANSEL. +2 CONT The last children contain some combined characters. The name tag gives +2 CONT the name of the non-spacing character tested within the 'person'. +2 CONT Within the name the hex-values of the non-spacing character is given +2 CONT UNICODE. The DEAT.PLAC tag contains all latin characters which are +2 CONT combined with the non-spacing character tested here and which have +2 CONT a UNICODE code point. The BIRT.PLAC tag contain the same letters +2 CONT without the non-spacing part. +2 CONT Example: One 'person' is named 'ring above'. The BIRT.PLAC +2 CONT tag contains all latin letters which have a UNICODE code point if +2 CONT combined with a ring above. The DEAT.PLAC tag contain the same +2 CONT charcters combined with this ring. +2 CONT Note: Not all charcters can be displayed on all computers. +2 CONT This strongly depends on the installed fonts and codepages. +2 CONT This file based on the following source: +2 CONT www.unicode.org delivered the connection from the code point names +2 CONT to the actual values. Note, that much more UNICODE characters are +2 CONT possible (like the chinese alphabet). +1 SUBM @SUBMITTER@ +1 DATE 20 JAN 1998 +0 @SUBMITTER@ SUBM +1 NAME /H. Eichmann/ +1 ADDR email: h.eichmann@@gmx.de +0 @FATHER@ INDI +1 NAME /cyrillic/ +1 BIRT +2 PLAC АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ +1 DEAT +2 PLAC абвгдежзийклмнопрстуфхцчшщъыьэюя +1 SEX M +1 FAMS @FAMILY@ +0 @MOTHER@ INDI +1 NAME /greek/ +1 BIRT +2 PLAC ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ +1 DEAT +2 PLAC αβγδεζηθικλμνξοπρςστυφχψω +1 SEX F +1 FAMS @FAMILY@ +0 @CHILD0@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 0/ +1 BIRT +2 PLAC capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) +1 DEAT +2 PLAC capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) +0 @CHILD1@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 1/ +1 BIRT +2 PLAC registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) +1 DEAT +2 PLAC modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) +0 @CHILD2@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 2/ +1 BIRT +2 PLAC small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) +1 DEAT +2 PLAC small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) +0 @CHILD3@ INDI +1 FAMC @FAMILY@ +1 NAME /Special Characters 3/ +1 BIRT +2 PLAC degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) +1 DEAT +2 PLAC music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) +0 @CHILD4@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0309/HOOK ABOVE/ +1 BIRT +2 PLAC AEIOU,Yaeio,uy +1 DEAT +2 PLAC ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ +0 @CHILD5@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0300/GRAVE/ +1 BIRT +2 PLAC AEIOU,WYaei,ouwy +1 DEAT +2 PLAC ÀÈÌÒÙ,ẀỲàèì,òùẁỳ +0 @CHILD6@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0301/ACUTE/ +1 BIRT +2 PLAC ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz +1 DEAT +2 PLAC ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź +0 @CHILD7@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0302/CIRCUMFLEX/ +1 BIRT +2 PLAC ACEGH,IJOSU,WYZac,eghij,osuwy,z +1 DEAT +2 PLAC ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ +0 @CHILD8@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0303/TILDE/ +1 BIRT +2 PLAC AEINO,UVYae,inouv,y +1 DEAT +2 PLAC ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ +0 @CHILD9@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0304/MACRON/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĀĒḠĪŌ,Ūāēḡī,ōū +0 @CHILD10@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0306/BREVE/ +1 BIRT +2 PLAC AEGIO,Uaegi,ou +1 DEAT +2 PLAC ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ +0 @CHILD11@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0307/DOT ABOVE/ +1 BIRT +2 PLAC BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz +1 DEAT +2 PLAC ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż +0 @CHILD12@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0308/DIAERESIS/ +1 BIRT +2 PLAC AEHIO,UWXYa,ehiot,uwxy +1 DEAT +2 PLAC ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ +0 @CHILD13@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030C/CARON/ +1 BIRT +2 PLAC ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z +1 DEAT +2 PLAC ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž +0 @CHILD14@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030A/RING ABOVE/ +1 BIRT +2 PLAC AUauw,y +1 DEAT +2 PLAC ÅŮåůẘ,ẙ +0 @CHILD15@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE20/LIGATURE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD16@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE21/LIGATURE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD17@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0315/COMMA ABOVE RIGHT/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD18@ INDI +1 FAMC @FAMILY@ +1 NAME code: 030B/DOUBLE ACUTE/ +1 BIRT +2 PLAC OUou +1 DEAT +2 PLAC ŐŰőű +0 @CHILD19@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0310/CANDRABINDU/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD20@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0327/CEDILLA/ +1 BIRT +2 PLAC CDGHK,LNRST,cdghk,lnrst +1 DEAT +2 PLAC ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ +0 @CHILD21@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0328/OGONEK/ +1 BIRT +2 PLAC AEIOU,aeiou +1 DEAT +2 PLAC ĄĘĮǪŲ,ąęįǫų +0 @CHILD22@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0323/DOT BELOW/ +1 BIRT +2 PLAC ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz +1 DEAT +2 PLAC ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ +0 @CHILD23@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0324/DIAERESIS BELOW/ +1 BIRT +2 PLAC Uu +1 DEAT +2 PLAC Ṳṳ +0 @CHILD24@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0325/RING BELOW/ +1 BIRT +2 PLAC Aa +1 DEAT +2 PLAC Ḁḁ +0 @CHILD25@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0333/DOUBLE LOW LINE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD26@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0332/LINE BELOW/ +1 BIRT +2 PLAC BDKLN,RTZbd,hklnr,tz +1 DEAT +2 PLAC ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ +0 @CHILD27@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0326/COMMA BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD28@ INDI +1 FAMC @FAMILY@ +1 NAME code: 031C/LEFT HALF RING BELOW/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD29@ INDI +1 FAMC @FAMILY@ +1 NAME code: 032E/BREVE BELOW/ +1 BIRT +2 PLAC Hh +1 DEAT +2 PLAC Ḫḫ +0 @CHILD30@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE22/DOUBLE TILDE LEFT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD31@ INDI +1 FAMC @FAMILY@ +1 NAME code: FE23/DOUBLE TILDE RIGHT HALF/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @CHILD32@ INDI +1 FAMC @FAMILY@ +1 NAME code: 0313/COMMA ABOVE/ +1 BIRT +2 PLAC - none - +1 DEAT +2 PLAC - none - +0 @FAMILY@ FAM +1 HUSB @FATHER@ +1 WIFE @MOTHER@ +1 CHIL @CHILD0@ +1 CHIL @CHILD1@ +1 CHIL @CHILD2@ +1 CHIL @CHILD3@ +1 CHIL @CHILD4@ +1 CHIL @CHILD5@ +1 CHIL @CHILD6@ +1 CHIL @CHILD7@ +1 CHIL @CHILD8@ +1 CHIL @CHILD9@ +1 CHIL @CHILD10@ +1 CHIL @CHILD11@ +1 CHIL @CHILD12@ +1 CHIL @CHILD13@ +1 CHIL @CHILD14@ +1 CHIL @CHILD15@ +1 CHIL @CHILD16@ +1 CHIL @CHILD17@ +1 CHIL @CHILD18@ +1 CHIL @CHILD19@ +1 CHIL @CHILD20@ +1 CHIL @CHILD21@ +1 CHIL @CHILD22@ +1 CHIL @CHILD23@ +1 CHIL @CHILD24@ +1 CHIL @CHILD25@ +1 CHIL @CHILD26@ +1 CHIL @CHILD27@ +1 CHIL @CHILD28@ +1 CHIL @CHILD29@ +1 CHIL @CHILD30@ +1 CHIL @CHILD31@ +1 CHIL @CHILD32@ +0 TRLR diff --git a/data/tests/UTF_8_NOBOM_LF.gramps b/data/tests/UTF_8_NOBOM_LF.gramps new file mode 100644 index 000000000..e2d9da800 --- /dev/null +++ b/data/tests/UTF_8_NOBOM_LF.gramps @@ -0,0 +1,882 @@ + + + +
+ + + /H. Eichmann/ + email: h.eichmann@@gmx.de + +
+ + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + Birth + + + + Death + + + + + + M + + cyrillic + + + + + + + F + + greek + + + + + + + U + + Special Characters 0 + + + + + + + U + + Special Characters 1 + + + + + + + U + + Special Characters 2 + + + + + + + U + + Special Characters 3 + + + + + + + U + + code: 0309 + HOOK ABOVE + + + + + + + U + + code: 0300 + GRAVE + + + + + + + U + + code: 0301 + ACUTE + + + + + + + U + + code: 0302 + CIRCUMFLEX + + + + + + + U + + code: 0303 + TILDE + + + + + + + U + + code: 0304 + MACRON + + + + + + + U + + code: 0306 + BREVE + + + + + + + U + + code: 0307 + DOT ABOVE + + + + + + + U + + code: 0308 + DIAERESIS + + + + + + + U + + code: 030C + CARON + + + + + + + U + + code: 030A + RING ABOVE + + + + + + + U + + code: FE20 + LIGATURE LEFT HALF + + + + + + + U + + code: FE21 + LIGATURE RIGHT HALF + + + + + + + U + + code: 0315 + COMMA ABOVE RIGHT + + + + + + + U + + code: 030B + DOUBLE ACUTE + + + + + + + U + + code: 0310 + CANDRABINDU + + + + + + + U + + code: 0327 + CEDILLA + + + + + + + U + + code: 0328 + OGONEK + + + + + + + U + + code: 0323 + DOT BELOW + + + + + + + U + + code: 0324 + DIAERESIS BELOW + + + + + + + U + + code: 0325 + RING BELOW + + + + + + + U + + code: 0333 + DOUBLE LOW LINE + + + + + + + U + + code: 0332 + LINE BELOW + + + + + + + U + + code: 0326 + COMMA BELOW + + + + + + + U + + code: 031C + LEFT HALF RING BELOW + + + + + + + U + + code: 032E + BREVE BELOW + + + + + + + U + + code: FE22 + DOUBLE TILDE LEFT HALF + + + + + + + U + + code: FE23 + DOUBLE TILDE RIGHT HALF + + + + + + + U + + code: 0313 + COMMA ABOVE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ + + + + абвгдежзийклмнопрстуфхцчшщъыьэюя + + + + ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ + + + + αβγδεζηθικλμνξοπρςστυφχψω + + + + capital L with stroke (Ł), capital O with stroke (Ø), capital D with stroke (Đ), capital thorn (Þ) + + + + capital AE (Æ), capital ligature OE (Œ), modified prime (ʹ), middle dot (·), music flat sign (♭) + + + + registered sign (®), plus-minus sign (±), capital O with horn (Ơ), capital U with horn (Ư) + + + + modifier right half ring (ʾ), modifier left half ring (ʿ), small L with stroke (ł), small O with stroke (ø), small D with stroke (đ) + + + + small thorn (þ), small AE (æ), small ligature OE (œ), modified double prime (ʺ) + + + + small dotless i (ı), pound sign (£), small eth (ð), small O with horn (ơ), small U with horn (ư) + + + + degree sign (°), script small L (ℓ), sound recording copyright (℗), copyright sign (©) + + + + music sharp sign (♯), inverted question mark (¿), inverted exclamation mark (¡), small sharp S (ß) + + + + AEIOU,Yaeio,uy + + + + ẢẺỈỎỦ,Ỷảẻỉỏ,ủỷ + + + + AEIOU,WYaei,ouwy + + + + ÀÈÌÒÙ,ẀỲàèì,òùẁỳ + + + + ACEGI,KLMNO,PRSUW,YZace,giklm,noprs,uwyz + + + + ÁĆÉǴÍ,ḰĹḾŃÓ,ṔŔŚÚẂ,ÝŹáćé,ǵíḱĺḿ,ńóṕŕś,úẃýź + + + + ACEGH,IJOSU,WYZac,eghij,osuwy,z + + + + ÂĈÊĜĤ,ÎĴÔŜÛ,ŴŶẐâĉ,êĝĥîĵ,ôŝûŵŷ,ẑ + + + + AEINO,UVYae,inouv,y + + + + ÃẼĨÑÕ,ŨṼỸãẽ,ĩñõũṽ,ỹ + + + + AEGIO,Uaegi,ou + + + + ĀĒḠĪŌ,Ūāēḡī,ōū + + + + ĂĔĞĬŎ,Ŭăĕğĭ,ŏŭ + + + + BCDEF,GHIMN,PRSTW,XYZbc,defgh,mnprs,twxyz + + + + ḂĊḊĖḞ,ĠḢİṀṄ,ṖṘṠṪẆ,ẊẎŻḃċ,ḋėḟġḣ,ṁṅṗṙṡ,ṫẇẋẏż + + + + AEHIO,UWXYa,ehiot,uwxy + + + + ÄËḦÏÖ,ÜẄẌŸä,ëḧïöẗ,üẅẍÿ + + + + ACDEG,IKLNO,RSTUZ,acdeg,ijkln,orstu,z + + + + ǍČĎĚǦ,ǏǨĽŇǑ,ŘŠŤǓŽ,ǎčďěǧ,ǐǰǩľň,ǒřšťǔ,ž + + + + AUauw,y + + + + ÅŮåůẘ,ẙ + + + + - none - + + + + OUou + + + + ŐŰőű + + + + CDGHK,LNRST,cdghk,lnrst + + + + ÇḐĢḨĶ,ĻŅŖŞŢ,çḑģḩķ,ļņŗşţ + + + + AEIOU,aeiou + + + + ĄĘĮǪŲ,ąęįǫų + + + + ABDEH,IKLMN,ORSTU,VWYZa,bdehi,klmno,rstuv,wyz + + + + ẠḄḌẸḤ,ỊḲḶṂṆ,ỌṚṢṬỤ,ṾẈỴẒạ,ḅḍẹḥị,ḳḷṃṇọ,ṛṣṭụṿ,ẉỵẓ + + + + Uu + + + + Ṳṳ + + + + Aa + + + + Ḁḁ + + + + BDKLN,RTZbd,hklnr,tz + + + + ḆḎḴḺṈ,ṞṮẔḇḏ,ẖḵḻṉṟ,ṯẕ + + + + Hh + + + + Ḫḫ + + + +
diff --git a/data/tests/cp1252_CR.ged b/data/tests/cp1252_CR.ged new file mode 100644 index 000000000..f7d054e9f --- /dev/null +++ b/data/tests/cp1252_CR.ged @@ -0,0 +1 @@ +0 HEAD 1 SOUR LIFELINES 3.0.62 1 DEST ANY 1 DATE 3 DEC 2010 2 TIME 7:03 1 GEDC 2 VERS 5.5 2 FORM LINEAGE-LINKED 1 LANG French 1 CHAR cp1252 0 @I1@ INDI 1 NAME Jean /Thomas/ 1 SEX F 1 BIRT 2 DATE 1830 1 DEAT 2 DATE 1904 1 NOTE 2 CONT Table de caractres Windows cp1252 2 CONT 32 [ ] 2 CONT 33 [!] 2 CONT 34 ["] 2 CONT 35 [#] 2 CONT 36 [$] 2 CONT 37 [%] 2 CONT 38 [&] 2 CONT 39 ['] 2 CONT 40 [(] 2 CONT 41 [)] 2 CONT 42 [*] 2 CONT 43 [+] 2 CONT 44 [,] 2 CONT 45 [-] 2 CONT 46 [.] 2 CONT 47 [/] 2 CONT 48 [0] 2 CONT 49 [1] 2 CONT 50 [2] 2 CONT 51 [3] 2 CONT 52 [4] 2 CONT 53 [5] 2 CONT 54 [6] 2 CONT 55 [7] 2 CONT 56 [8] 2 CONT 57 [9] 2 CONT 58 [:] 2 CONT 59 [;] 2 CONT 60 [<] 2 CONT 61 [=] 2 CONT 62 [>] 2 CONT 63 [?] 2 CONT 64 [@] 2 CONT 65 [A] 2 CONT 66 [B] 2 CONT 67 [C] 2 CONT 68 [D] 2 CONT 69 [E] 2 CONT 70 [F] 2 CONT 71 [G] 2 CONT 72 [H] 2 CONT 73 [I] 2 CONT 74 [J] 2 CONT 75 [K] 2 CONT 76 [L] 2 CONT 77 [M] 2 CONT 78 [N] 2 CONT 79 [O] 2 CONT 80 [P] 2 CONT 81 [Q] 2 CONT 82 [R] 2 CONT 83 [S] 2 CONT 84 [T] 2 CONT 85 [U] 2 CONT 86 [V] 2 CONT 87 [W] 2 CONT 88 [X] 2 CONT 89 [Y] 2 CONT 90 [Z] 2 CONT 91 [[] 2 CONT 92 [\] 2 CONT 93 []] 2 CONT 94 [^] 2 CONT 95 [_] 2 CONT 96 [`] 2 CONT 97 [a] 2 CONT 98 [b] 2 CONT 99 [c] 2 CONT 100 [d] 2 CONT 101 [e] 2 CONT 102 [f] 2 CONT 103 [g] 2 CONT 104 [h] 2 CONT 105 [i] 2 CONT 106 [j] 2 CONT 107 [k] 2 CONT 108 [l] 2 CONT 109 [m] 2 CONT 110 [n] 2 CONT 111 [o] 2 CONT 112 [p] 2 CONT 113 [q] 2 CONT 114 [r] 2 CONT 115 [s] 2 CONT 116 [t] 2 CONT 117 [u] 2 CONT 118 [v] 2 CONT 119 [w] 2 CONT 120 [x] 2 CONT 121 [y] 2 CONT 122 [z] 2 CONT 123 [{] 2 CONT 124 [|] 2 CONT 125 [}] 2 CONT 126 [~] 2 CONT 128 [] 2 CONT 130 [] 2 CONT 131 [] 2 CONT 132 [] 2 CONT 133 [] 2 CONT 134 [] 2 CONT 135 [] 2 CONT 136 [] 2 CONT 137 [] 2 CONT 138 [] 2 CONT 139 [] 2 CONT 140 [] 2 CONT 145 [] 2 CONT 146 [] 2 CONT 147 [] 2 CONT 148 [] 2 CONT 149 [] 2 CONT 150 [] 2 CONT 151 [] 2 CONT 152 [] 2 CONT 153 [] 2 CONT 154 [] 2 CONT 155 [] 2 CONT 156 [] 2 CONT 159 [] 2 CONT 160 [] 2 CONT 161 [] 2 CONT 162 [] 2 CONT 163 [] 2 CONT 164 [] 2 CONT 165 [] 2 CONT 166 [] 2 CONT 167 [] 2 CONT 168 [] 2 CONT 169 [] 2 CONT 170 [] 2 CONT 171 [] 2 CONT 172 [] 2 CONT 173 [] 2 CONT 174 [] 2 CONT 175 [] 2 CONT 176 [] 2 CONT 177 [] 2 CONT 178 [] 2 CONT 179 [] 2 CONT 180 [] 2 CONT 181 [] 2 CONT 182 [] 2 CONT 183 [] 2 CONT 184 [] 2 CONT 185 [] 2 CONT 186 [] 2 CONT 187 [] 2 CONT 188 [] 2 CONT 189 [] 2 CONT 190 [] 2 CONT 191 [] 2 CONT 192 [] 2 CONT 193 [] 2 CONT 194 [] 2 CONT 195 [] 2 CONT 196 [] 2 CONT 197 [] 2 CONT 198 [] 2 CONT 199 [] 2 CONT 200 [] 2 CONT 201 [] 2 CONT 202 [] 2 CONT 203 [] 2 CONT 204 [] 2 CONT 205 [] 2 CONT 206 [] 2 CONT 207 [] 2 CONT 208 [] 2 CONT 209 [] 2 CONT 210 [] 2 CONT 211 [] 2 CONT 212 [] 2 CONT 213 [] 2 CONT 214 [] 2 CONT 215 [] 2 CONT 216 [] 2 CONT 217 [] 2 CONT 218 [] 2 CONT 219 [] 2 CONT 220 [] 2 CONT 221 [] 2 CONT 222 [] 2 CONT 223 [] 2 CONT 224 [] 2 CONT 225 [] 2 CONT 226 [] 2 CONT 227 [] 2 CONT 228 [] 2 CONT 229 [] 2 CONT 230 [] 2 CONT 231 [] 2 CONT 232 [] 2 CONT 233 [] 2 CONT 234 [] 2 CONT 235 [] 2 CONT 236 [] 2 CONT 237 [] 2 CONT 238 [] 2 CONT 239 [] 2 CONT 240 [] 2 CONT 241 [] 2 CONT 242 [] 2 CONT 243 [] 2 CONT 244 [] 2 CONT 245 [] 2 CONT 246 [] 2 CONT 247 [] 2 CONT 248 [] 2 CONT 249 [] 2 CONT 250 [] 2 CONT 251 [] 2 CONT 252 [] 2 CONT 253 [] 2 CONT 254 [] 2 CONT 255 [] 0 TRLR \ No newline at end of file diff --git a/data/tests/cp1252_CR.gramps b/data/tests/cp1252_CR.gramps new file mode 100644 index 000000000..1bbeee04c --- /dev/null +++ b/data/tests/cp1252_CR.gramps @@ -0,0 +1,260 @@ + + + +
+ + + Paul Culley + 11210 Olde Mint House Ln + Tomball + Tx + USA + 77375 + paulr2787@gmail.com + +
+ + + Birth + + + + Death + + + + + + F + + Jean + Thomas + + + + + + + + + Table de caractères Windows cp1252 +32 [ ] +33 [!] +34 ["] +35 [#] +36 [$] +37 [%] +38 [&] +39 ['] +40 [(] +41 [)] +42 [*] +43 [+] +44 [,] +45 [-] +46 [.] +47 [/] +48 [0] +49 [1] +50 [2] +51 [3] +52 [4] +53 [5] +54 [6] +55 [7] +56 [8] +57 [9] +58 [:] +59 [;] +60 [<] +61 [=] +62 [>] +63 [?] +64 [@] +65 [A] +66 [B] +67 [C] +68 [D] +69 [E] +70 [F] +71 [G] +72 [H] +73 [I] +74 [J] +75 [K] +76 [L] +77 [M] +78 [N] +79 [O] +80 [P] +81 [Q] +82 [R] +83 [S] +84 [T] +85 [U] +86 [V] +87 [W] +88 [X] +89 [Y] +90 [Z] +91 [[] +92 [\] +93 []] +94 [^] +95 [_] +96 [`] +97 [a] +98 [b] +99 [c] +100 [d] +101 [e] +102 [f] +103 [g] +104 [h] +105 [i] +106 [j] +107 [k] +108 [l] +109 [m] +110 [n] +111 [o] +112 [p] +113 [q] +114 [r] +115 [s] +116 [t] +117 [u] +118 [v] +119 [w] +120 [x] +121 [y] +122 [z] +123 [{] +124 [|] +125 [}] +126 [~] +128 [€] +130 [‚] +131 [ƒ] +132 [„] +133 […] +134 [†] +135 [‡] +136 [ˆ] +137 [‰] +138 [Š] +139 [‹] +140 [Œ] +145 [‘] +146 [’] +147 [“] +148 [”] +149 [•] +150 [–] +151 [—] +152 [˜] +153 [™] +154 [š] +155 [›] +156 [œ] +159 [Ÿ] +160 [ ] +161 [¡] +162 [¢] +163 [£] +164 [¤] +165 [¥] +166 [¦] +167 [§] +168 [¨] +169 [©] +170 [ª] +171 [«] +172 [¬] +173 [­] +174 [®] +175 [¯] +176 [°] +177 [±] +178 [²] +179 [³] +180 [´] +181 [µ] +182 [¶] +183 [·] +184 [¸] +185 [¹] +186 [º] +187 [»] +188 [¼] +189 [½] +190 [¾] +191 [¿] +192 [À] +193 [Á] +194 [Â] +195 [Ã] +196 [Ä] +197 [Å] +198 [Æ] +199 [Ç] +200 [È] +201 [É] +202 [Ê] +203 [Ë] +204 [Ì] +205 [Í] +206 [Î] +207 [Ï] +208 [Ð] +209 [Ñ] +210 [Ò] +211 [Ó] +212 [Ô] +213 [Õ] +214 [Ö] +215 [×] +216 [Ø] +217 [Ù] +218 [Ú] +219 [Û] +220 [Ü] +221 [Ý] +222 [Þ] +223 [ß] +224 [à] +225 [á] +226 [â] +227 [ã] +228 [ä] +229 [å] +230 [æ] +231 [ç] +232 [è] +233 [é] +234 [ê] +235 [ë] +236 [ì] +237 [í] +238 [î] +239 [ï] +240 [ð] +241 [ñ] +242 [ò] +243 [ó] +244 [ô] +245 [õ] +246 [ö] +247 [÷] +248 [ø] +249 [ù] +250 [ú] +251 [û] +252 [ü] +253 [ý] +254 [þ] +255 [ÿ] + + +
diff --git a/data/tests/cp1252_CRLF.ged b/data/tests/cp1252_CRLF.ged new file mode 100644 index 000000000..8c21f45d4 --- /dev/null +++ b/data/tests/cp1252_CRLF.ged @@ -0,0 +1,236 @@ +0 HEAD +1 SOUR LIFELINES 3.0.62 +1 DEST ANY +1 DATE 3 DEC 2010 +2 TIME 7:03 +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 LANG French +1 CHAR cp1252 +0 @I1@ INDI +1 NAME Jean /Thomas/ +1 SEX F +1 BIRT +2 DATE 1830 +1 DEAT +2 DATE 1904 +1 NOTE +2 CONT Table de caractres Windows cp1252 +2 CONT 32 [ ] +2 CONT 33 [!] +2 CONT 34 ["] +2 CONT 35 [#] +2 CONT 36 [$] +2 CONT 37 [%] +2 CONT 38 [&] +2 CONT 39 ['] +2 CONT 40 [(] +2 CONT 41 [)] +2 CONT 42 [*] +2 CONT 43 [+] +2 CONT 44 [,] +2 CONT 45 [-] +2 CONT 46 [.] +2 CONT 47 [/] +2 CONT 48 [0] +2 CONT 49 [1] +2 CONT 50 [2] +2 CONT 51 [3] +2 CONT 52 [4] +2 CONT 53 [5] +2 CONT 54 [6] +2 CONT 55 [7] +2 CONT 56 [8] +2 CONT 57 [9] +2 CONT 58 [:] +2 CONT 59 [;] +2 CONT 60 [<] +2 CONT 61 [=] +2 CONT 62 [>] +2 CONT 63 [?] +2 CONT 64 [@] +2 CONT 65 [A] +2 CONT 66 [B] +2 CONT 67 [C] +2 CONT 68 [D] +2 CONT 69 [E] +2 CONT 70 [F] +2 CONT 71 [G] +2 CONT 72 [H] +2 CONT 73 [I] +2 CONT 74 [J] +2 CONT 75 [K] +2 CONT 76 [L] +2 CONT 77 [M] +2 CONT 78 [N] +2 CONT 79 [O] +2 CONT 80 [P] +2 CONT 81 [Q] +2 CONT 82 [R] +2 CONT 83 [S] +2 CONT 84 [T] +2 CONT 85 [U] +2 CONT 86 [V] +2 CONT 87 [W] +2 CONT 88 [X] +2 CONT 89 [Y] +2 CONT 90 [Z] +2 CONT 91 [[] +2 CONT 92 [\] +2 CONT 93 []] +2 CONT 94 [^] +2 CONT 95 [_] +2 CONT 96 [`] +2 CONT 97 [a] +2 CONT 98 [b] +2 CONT 99 [c] +2 CONT 100 [d] +2 CONT 101 [e] +2 CONT 102 [f] +2 CONT 103 [g] +2 CONT 104 [h] +2 CONT 105 [i] +2 CONT 106 [j] +2 CONT 107 [k] +2 CONT 108 [l] +2 CONT 109 [m] +2 CONT 110 [n] +2 CONT 111 [o] +2 CONT 112 [p] +2 CONT 113 [q] +2 CONT 114 [r] +2 CONT 115 [s] +2 CONT 116 [t] +2 CONT 117 [u] +2 CONT 118 [v] +2 CONT 119 [w] +2 CONT 120 [x] +2 CONT 121 [y] +2 CONT 122 [z] +2 CONT 123 [{] +2 CONT 124 [|] +2 CONT 125 [}] +2 CONT 126 [~] +2 CONT 128 [] +2 CONT 130 [] +2 CONT 131 [] +2 CONT 132 [] +2 CONT 133 [] +2 CONT 134 [] +2 CONT 135 [] +2 CONT 136 [] +2 CONT 137 [] +2 CONT 138 [] +2 CONT 139 [] +2 CONT 140 [] +2 CONT 145 [] +2 CONT 146 [] +2 CONT 147 [] +2 CONT 148 [] +2 CONT 149 [] +2 CONT 150 [] +2 CONT 151 [] +2 CONT 152 [] +2 CONT 153 [] +2 CONT 154 [] +2 CONT 155 [] +2 CONT 156 [] +2 CONT 159 [] +2 CONT 160 [] +2 CONT 161 [] +2 CONT 162 [] +2 CONT 163 [] +2 CONT 164 [] +2 CONT 165 [] +2 CONT 166 [] +2 CONT 167 [] +2 CONT 168 [] +2 CONT 169 [] +2 CONT 170 [] +2 CONT 171 [] +2 CONT 172 [] +2 CONT 173 [] +2 CONT 174 [] +2 CONT 175 [] +2 CONT 176 [] +2 CONT 177 [] +2 CONT 178 [] +2 CONT 179 [] +2 CONT 180 [] +2 CONT 181 [] +2 CONT 182 [] +2 CONT 183 [] +2 CONT 184 [] +2 CONT 185 [] +2 CONT 186 [] +2 CONT 187 [] +2 CONT 188 [] +2 CONT 189 [] +2 CONT 190 [] +2 CONT 191 [] +2 CONT 192 [] +2 CONT 193 [] +2 CONT 194 [] +2 CONT 195 [] +2 CONT 196 [] +2 CONT 197 [] +2 CONT 198 [] +2 CONT 199 [] +2 CONT 200 [] +2 CONT 201 [] +2 CONT 202 [] +2 CONT 203 [] +2 CONT 204 [] +2 CONT 205 [] +2 CONT 206 [] +2 CONT 207 [] +2 CONT 208 [] +2 CONT 209 [] +2 CONT 210 [] +2 CONT 211 [] +2 CONT 212 [] +2 CONT 213 [] +2 CONT 214 [] +2 CONT 215 [] +2 CONT 216 [] +2 CONT 217 [] +2 CONT 218 [] +2 CONT 219 [] +2 CONT 220 [] +2 CONT 221 [] +2 CONT 222 [] +2 CONT 223 [] +2 CONT 224 [] +2 CONT 225 [] +2 CONT 226 [] +2 CONT 227 [] +2 CONT 228 [] +2 CONT 229 [] +2 CONT 230 [] +2 CONT 231 [] +2 CONT 232 [] +2 CONT 233 [] +2 CONT 234 [] +2 CONT 235 [] +2 CONT 236 [] +2 CONT 237 [] +2 CONT 238 [] +2 CONT 239 [] +2 CONT 240 [] +2 CONT 241 [] +2 CONT 242 [] +2 CONT 243 [] +2 CONT 244 [] +2 CONT 245 [] +2 CONT 246 [] +2 CONT 247 [] +2 CONT 248 [] +2 CONT 249 [] +2 CONT 250 [] +2 CONT 251 [] +2 CONT 252 [] +2 CONT 253 [] +2 CONT 254 [] +2 CONT 255 [] +0 TRLR diff --git a/data/tests/cp1252_CRLF.gramps b/data/tests/cp1252_CRLF.gramps new file mode 100644 index 000000000..1bbeee04c --- /dev/null +++ b/data/tests/cp1252_CRLF.gramps @@ -0,0 +1,260 @@ + + + +
+ + + Paul Culley + 11210 Olde Mint House Ln + Tomball + Tx + USA + 77375 + paulr2787@gmail.com + +
+ + + Birth + + + + Death + + + + + + F + + Jean + Thomas + + + + + + + + + Table de caractères Windows cp1252 +32 [ ] +33 [!] +34 ["] +35 [#] +36 [$] +37 [%] +38 [&] +39 ['] +40 [(] +41 [)] +42 [*] +43 [+] +44 [,] +45 [-] +46 [.] +47 [/] +48 [0] +49 [1] +50 [2] +51 [3] +52 [4] +53 [5] +54 [6] +55 [7] +56 [8] +57 [9] +58 [:] +59 [;] +60 [<] +61 [=] +62 [>] +63 [?] +64 [@] +65 [A] +66 [B] +67 [C] +68 [D] +69 [E] +70 [F] +71 [G] +72 [H] +73 [I] +74 [J] +75 [K] +76 [L] +77 [M] +78 [N] +79 [O] +80 [P] +81 [Q] +82 [R] +83 [S] +84 [T] +85 [U] +86 [V] +87 [W] +88 [X] +89 [Y] +90 [Z] +91 [[] +92 [\] +93 []] +94 [^] +95 [_] +96 [`] +97 [a] +98 [b] +99 [c] +100 [d] +101 [e] +102 [f] +103 [g] +104 [h] +105 [i] +106 [j] +107 [k] +108 [l] +109 [m] +110 [n] +111 [o] +112 [p] +113 [q] +114 [r] +115 [s] +116 [t] +117 [u] +118 [v] +119 [w] +120 [x] +121 [y] +122 [z] +123 [{] +124 [|] +125 [}] +126 [~] +128 [€] +130 [‚] +131 [ƒ] +132 [„] +133 […] +134 [†] +135 [‡] +136 [ˆ] +137 [‰] +138 [Š] +139 [‹] +140 [Œ] +145 [‘] +146 [’] +147 [“] +148 [”] +149 [•] +150 [–] +151 [—] +152 [˜] +153 [™] +154 [š] +155 [›] +156 [œ] +159 [Ÿ] +160 [ ] +161 [¡] +162 [¢] +163 [£] +164 [¤] +165 [¥] +166 [¦] +167 [§] +168 [¨] +169 [©] +170 [ª] +171 [«] +172 [¬] +173 [­] +174 [®] +175 [¯] +176 [°] +177 [±] +178 [²] +179 [³] +180 [´] +181 [µ] +182 [¶] +183 [·] +184 [¸] +185 [¹] +186 [º] +187 [»] +188 [¼] +189 [½] +190 [¾] +191 [¿] +192 [À] +193 [Á] +194 [Â] +195 [Ã] +196 [Ä] +197 [Å] +198 [Æ] +199 [Ç] +200 [È] +201 [É] +202 [Ê] +203 [Ë] +204 [Ì] +205 [Í] +206 [Î] +207 [Ï] +208 [Ð] +209 [Ñ] +210 [Ò] +211 [Ó] +212 [Ô] +213 [Õ] +214 [Ö] +215 [×] +216 [Ø] +217 [Ù] +218 [Ú] +219 [Û] +220 [Ü] +221 [Ý] +222 [Þ] +223 [ß] +224 [à] +225 [á] +226 [â] +227 [ã] +228 [ä] +229 [å] +230 [æ] +231 [ç] +232 [è] +233 [é] +234 [ê] +235 [ë] +236 [ì] +237 [í] +238 [î] +239 [ï] +240 [ð] +241 [ñ] +242 [ò] +243 [ó] +244 [ô] +245 [õ] +246 [ö] +247 [÷] +248 [ø] +249 [ù] +250 [ú] +251 [û] +252 [ü] +253 [ý] +254 [þ] +255 [ÿ] + + +
diff --git a/data/tests/cp1252_LF.ged b/data/tests/cp1252_LF.ged new file mode 100644 index 000000000..037a286c4 --- /dev/null +++ b/data/tests/cp1252_LF.ged @@ -0,0 +1,236 @@ +0 HEAD +1 SOUR LIFELINES 3.0.62 +1 DEST ANY +1 DATE 3 DEC 2010 +2 TIME 7:03 +1 GEDC +2 VERS 5.5 +2 FORM LINEAGE-LINKED +1 LANG French +1 CHAR cp1252 +0 @I1@ INDI +1 NAME Jean /Thomas/ +1 SEX F +1 BIRT +2 DATE 1830 +1 DEAT +2 DATE 1904 +1 NOTE +2 CONT Table de caractres Windows cp1252 +2 CONT 32 [ ] +2 CONT 33 [!] +2 CONT 34 ["] +2 CONT 35 [#] +2 CONT 36 [$] +2 CONT 37 [%] +2 CONT 38 [&] +2 CONT 39 ['] +2 CONT 40 [(] +2 CONT 41 [)] +2 CONT 42 [*] +2 CONT 43 [+] +2 CONT 44 [,] +2 CONT 45 [-] +2 CONT 46 [.] +2 CONT 47 [/] +2 CONT 48 [0] +2 CONT 49 [1] +2 CONT 50 [2] +2 CONT 51 [3] +2 CONT 52 [4] +2 CONT 53 [5] +2 CONT 54 [6] +2 CONT 55 [7] +2 CONT 56 [8] +2 CONT 57 [9] +2 CONT 58 [:] +2 CONT 59 [;] +2 CONT 60 [<] +2 CONT 61 [=] +2 CONT 62 [>] +2 CONT 63 [?] +2 CONT 64 [@] +2 CONT 65 [A] +2 CONT 66 [B] +2 CONT 67 [C] +2 CONT 68 [D] +2 CONT 69 [E] +2 CONT 70 [F] +2 CONT 71 [G] +2 CONT 72 [H] +2 CONT 73 [I] +2 CONT 74 [J] +2 CONT 75 [K] +2 CONT 76 [L] +2 CONT 77 [M] +2 CONT 78 [N] +2 CONT 79 [O] +2 CONT 80 [P] +2 CONT 81 [Q] +2 CONT 82 [R] +2 CONT 83 [S] +2 CONT 84 [T] +2 CONT 85 [U] +2 CONT 86 [V] +2 CONT 87 [W] +2 CONT 88 [X] +2 CONT 89 [Y] +2 CONT 90 [Z] +2 CONT 91 [[] +2 CONT 92 [\] +2 CONT 93 []] +2 CONT 94 [^] +2 CONT 95 [_] +2 CONT 96 [`] +2 CONT 97 [a] +2 CONT 98 [b] +2 CONT 99 [c] +2 CONT 100 [d] +2 CONT 101 [e] +2 CONT 102 [f] +2 CONT 103 [g] +2 CONT 104 [h] +2 CONT 105 [i] +2 CONT 106 [j] +2 CONT 107 [k] +2 CONT 108 [l] +2 CONT 109 [m] +2 CONT 110 [n] +2 CONT 111 [o] +2 CONT 112 [p] +2 CONT 113 [q] +2 CONT 114 [r] +2 CONT 115 [s] +2 CONT 116 [t] +2 CONT 117 [u] +2 CONT 118 [v] +2 CONT 119 [w] +2 CONT 120 [x] +2 CONT 121 [y] +2 CONT 122 [z] +2 CONT 123 [{] +2 CONT 124 [|] +2 CONT 125 [}] +2 CONT 126 [~] +2 CONT 128 [] +2 CONT 130 [] +2 CONT 131 [] +2 CONT 132 [] +2 CONT 133 [] +2 CONT 134 [] +2 CONT 135 [] +2 CONT 136 [] +2 CONT 137 [] +2 CONT 138 [] +2 CONT 139 [] +2 CONT 140 [] +2 CONT 145 [] +2 CONT 146 [] +2 CONT 147 [] +2 CONT 148 [] +2 CONT 149 [] +2 CONT 150 [] +2 CONT 151 [] +2 CONT 152 [] +2 CONT 153 [] +2 CONT 154 [] +2 CONT 155 [] +2 CONT 156 [] +2 CONT 159 [] +2 CONT 160 [] +2 CONT 161 [] +2 CONT 162 [] +2 CONT 163 [] +2 CONT 164 [] +2 CONT 165 [] +2 CONT 166 [] +2 CONT 167 [] +2 CONT 168 [] +2 CONT 169 [] +2 CONT 170 [] +2 CONT 171 [] +2 CONT 172 [] +2 CONT 173 [] +2 CONT 174 [] +2 CONT 175 [] +2 CONT 176 [] +2 CONT 177 [] +2 CONT 178 [] +2 CONT 179 [] +2 CONT 180 [] +2 CONT 181 [] +2 CONT 182 [] +2 CONT 183 [] +2 CONT 184 [] +2 CONT 185 [] +2 CONT 186 [] +2 CONT 187 [] +2 CONT 188 [] +2 CONT 189 [] +2 CONT 190 [] +2 CONT 191 [] +2 CONT 192 [] +2 CONT 193 [] +2 CONT 194 [] +2 CONT 195 [] +2 CONT 196 [] +2 CONT 197 [] +2 CONT 198 [] +2 CONT 199 [] +2 CONT 200 [] +2 CONT 201 [] +2 CONT 202 [] +2 CONT 203 [] +2 CONT 204 [] +2 CONT 205 [] +2 CONT 206 [] +2 CONT 207 [] +2 CONT 208 [] +2 CONT 209 [] +2 CONT 210 [] +2 CONT 211 [] +2 CONT 212 [] +2 CONT 213 [] +2 CONT 214 [] +2 CONT 215 [] +2 CONT 216 [] +2 CONT 217 [] +2 CONT 218 [] +2 CONT 219 [] +2 CONT 220 [] +2 CONT 221 [] +2 CONT 222 [] +2 CONT 223 [] +2 CONT 224 [] +2 CONT 225 [] +2 CONT 226 [] +2 CONT 227 [] +2 CONT 228 [] +2 CONT 229 [] +2 CONT 230 [] +2 CONT 231 [] +2 CONT 232 [] +2 CONT 233 [] +2 CONT 234 [] +2 CONT 235 [] +2 CONT 236 [] +2 CONT 237 [] +2 CONT 238 [] +2 CONT 239 [] +2 CONT 240 [] +2 CONT 241 [] +2 CONT 242 [] +2 CONT 243 [] +2 CONT 244 [] +2 CONT 245 [] +2 CONT 246 [] +2 CONT 247 [] +2 CONT 248 [] +2 CONT 249 [] +2 CONT 250 [] +2 CONT 251 [] +2 CONT 252 [] +2 CONT 253 [] +2 CONT 254 [] +2 CONT 255 [] +0 TRLR diff --git a/data/tests/cp1252_LF.gramps b/data/tests/cp1252_LF.gramps new file mode 100644 index 000000000..1bbeee04c --- /dev/null +++ b/data/tests/cp1252_LF.gramps @@ -0,0 +1,260 @@ + + + +
+ + + Paul Culley + 11210 Olde Mint House Ln + Tomball + Tx + USA + 77375 + paulr2787@gmail.com + +
+ + + Birth + + + + Death + + + + + + F + + Jean + Thomas + + + + + + + + + Table de caractères Windows cp1252 +32 [ ] +33 [!] +34 ["] +35 [#] +36 [$] +37 [%] +38 [&] +39 ['] +40 [(] +41 [)] +42 [*] +43 [+] +44 [,] +45 [-] +46 [.] +47 [/] +48 [0] +49 [1] +50 [2] +51 [3] +52 [4] +53 [5] +54 [6] +55 [7] +56 [8] +57 [9] +58 [:] +59 [;] +60 [<] +61 [=] +62 [>] +63 [?] +64 [@] +65 [A] +66 [B] +67 [C] +68 [D] +69 [E] +70 [F] +71 [G] +72 [H] +73 [I] +74 [J] +75 [K] +76 [L] +77 [M] +78 [N] +79 [O] +80 [P] +81 [Q] +82 [R] +83 [S] +84 [T] +85 [U] +86 [V] +87 [W] +88 [X] +89 [Y] +90 [Z] +91 [[] +92 [\] +93 []] +94 [^] +95 [_] +96 [`] +97 [a] +98 [b] +99 [c] +100 [d] +101 [e] +102 [f] +103 [g] +104 [h] +105 [i] +106 [j] +107 [k] +108 [l] +109 [m] +110 [n] +111 [o] +112 [p] +113 [q] +114 [r] +115 [s] +116 [t] +117 [u] +118 [v] +119 [w] +120 [x] +121 [y] +122 [z] +123 [{] +124 [|] +125 [}] +126 [~] +128 [€] +130 [‚] +131 [ƒ] +132 [„] +133 […] +134 [†] +135 [‡] +136 [ˆ] +137 [‰] +138 [Š] +139 [‹] +140 [Œ] +145 [‘] +146 [’] +147 [“] +148 [”] +149 [•] +150 [–] +151 [—] +152 [˜] +153 [™] +154 [š] +155 [›] +156 [œ] +159 [Ÿ] +160 [ ] +161 [¡] +162 [¢] +163 [£] +164 [¤] +165 [¥] +166 [¦] +167 [§] +168 [¨] +169 [©] +170 [ª] +171 [«] +172 [¬] +173 [­] +174 [®] +175 [¯] +176 [°] +177 [±] +178 [²] +179 [³] +180 [´] +181 [µ] +182 [¶] +183 [·] +184 [¸] +185 [¹] +186 [º] +187 [»] +188 [¼] +189 [½] +190 [¾] +191 [¿] +192 [À] +193 [Á] +194 [Â] +195 [Ã] +196 [Ä] +197 [Å] +198 [Æ] +199 [Ç] +200 [È] +201 [É] +202 [Ê] +203 [Ë] +204 [Ì] +205 [Í] +206 [Î] +207 [Ï] +208 [Ð] +209 [Ñ] +210 [Ò] +211 [Ó] +212 [Ô] +213 [Õ] +214 [Ö] +215 [×] +216 [Ø] +217 [Ù] +218 [Ú] +219 [Û] +220 [Ü] +221 [Ý] +222 [Þ] +223 [ß] +224 [à] +225 [á] +226 [â] +227 [ã] +228 [ä] +229 [å] +230 [æ] +231 [ç] +232 [è] +233 [é] +234 [ê] +235 [ë] +236 [ì] +237 [í] +238 [î] +239 [ï] +240 [ð] +241 [ñ] +242 [ò] +243 [ó] +244 [ô] +245 [õ] +246 [ö] +247 [÷] +248 [ø] +249 [ù] +250 [ú] +251 [û] +252 [ü] +253 [ý] +254 [þ] +255 [ÿ] + + +
diff --git a/gramps/plugins/importer/importgedcom.py b/gramps/plugins/importer/importgedcom.py index 482589b23..3f972ee99 100644 --- a/gramps/plugins/importer/importgedcom.py +++ b/gramps/plugins/importer/importgedcom.py @@ -65,26 +65,31 @@ def importData(database, filename, user): if DbMixin not in database.__class__.__bases__: database.__class__.__bases__ = (DbMixin,) + \ database.__class__.__bases__ - try: - with open(filename, "rb") as ifile: + # Opening in utf-8 with universal newline to allow cr, lf, and crlf + # If the file is really UTF16 or a varient, the next block code will not + # find anything even if it is there, but this is ok since it won't be + # ANSEL, or is inconsistent... + with open(filename, "r", encoding='utf-8', errors='replace', + newline=None) as ifile: ansel = False gramps = False for index in range(50): - # Treat the file as though it is UTF-8 since this is the more modern - # option; and anyway it doesn't really matter as we are only trying to - # detect a CHAR or SOUR line which is only 7-bit ASCII anyway, and we - # ignore anything that can't be translated. + # Treat the file as though it is UTF-8 since this is the more + # modern option; and anyway it doesn't really matter as we are + # only trying to detect a CHAR or SOUR line which is only + # 7-bit ASCII anyway, and we ignore anything that can't be + # translated. line = ifile.readline() - line = line.decode(encoding='utf-8', errors='replace') line = line.split() if len(line) == 0: break - if len(line) > 2 and line[1][0:4] == 'CHAR' and line[2] == "ANSEL": + if len(line) > 2 and line[1][0:4] == 'CHAR' \ + and line[2] == "ANSEL": ansel = True - if len(line) > 2 and line[1][0:4] == 'SOUR' and line[2] == "GRAMPS": + if len(line) > 2 and line[1][0:4] == 'SOUR' \ + and line[2] == "GRAMPS": gramps = True - except IOError: return diff --git a/gramps/plugins/lib/libgedcom.py b/gramps/plugins/lib/libgedcom.py index b19fef16e..34368d970 100755 --- a/gramps/plugins/lib/libgedcom.py +++ b/gramps/plugins/lib/libgedcom.py @@ -94,7 +94,7 @@ import codecs from xml.parsers.expat import ParserCreate from collections import defaultdict, OrderedDict import string -from io import StringIO +from io import StringIO, TextIOWrapper from urllib.parse import urlparse #------------------------------------------------------------------------ @@ -1248,41 +1248,41 @@ class BaseReader: class UTF8Reader(BaseReader): - def __init__(self, ifile, __add_msg): - BaseReader.__init__(self, ifile, 'utf8', __add_msg) + def __init__(self, ifile, __add_msg, enc): + BaseReader.__init__(self, ifile, enc, __add_msg) self.reset() - - def reset(self): - self.ifile.seek(0) - data = self.ifile.read(3) - if data != b"\xef\xbb\xbf": - self.ifile.seek(0) + if enc == 'UTF_8_SIG': + self.ifile = TextIOWrapper(ifile, encoding='utf_8_sig', + errors='replace', newline=None) + else: + self.ifile = TextIOWrapper(ifile, encoding='utf_8', + errors='replace', newline=None) def readline(self): line = self.ifile.readline() - line = line.decode(self.enc, errors='replace') return line.translate(STRIP_DICT) class UTF16Reader(BaseReader): def __init__(self, ifile, __add_msg): - new_file = codecs.EncodedFile(ifile, 'utf8', 'utf16') - BaseReader.__init__(self, new_file, '', __add_msg) + BaseReader.__init__(self, ifile, 'UTF16', __add_msg) + self.ifile = TextIOWrapper(ifile, encoding='utf_16', + errors='replace', newline=None) self.reset() def readline(self): line = self.ifile.readline() - line = line.decode('utf8', errors='replace') return line.translate(STRIP_DICT) class AnsiReader(BaseReader): def __init__(self, ifile, __add_msg): BaseReader.__init__(self, ifile, 'latin1', __add_msg) + self.ifile = TextIOWrapper(ifile, encoding='latin1', + errors='replace', newline=None) def readline(self): line = self.ifile.readline() - line = line.decode(self.enc, errors='replace') if line.translate(DEL_AND_C1) != line: self.report_error("DEL or C1 control chars in line did you mean CHAR cp1252??", line) return line.translate(STRIP_DICT) @@ -1291,10 +1291,11 @@ class CP1252Reader(BaseReader): def __init__(self, ifile, __add_msg): BaseReader.__init__(self, ifile, 'cp1252', __add_msg) + self.ifile = TextIOWrapper(ifile, encoding='cp1252', + errors='replace', newline=None) def readline(self): line = self.ifile.readline() - line = line.decode(self.enc, errors='replace') return line.translate(STRIP_DICT) class AnselReader(BaseReader): @@ -1562,10 +1563,17 @@ class AnselReader(BaseReader): return ans def __init__(self, ifile, __add_msg): - BaseReader.__init__(self, ifile, "", __add_msg) + BaseReader.__init__(self, ifile, "ANSEL", __add_msg) + # In theory, we should have been able to skip the encode/decode from + # ascii. But this way allows us to use pythons universal newline + self.ifile = TextIOWrapper(ifile, encoding='ascii', + errors='surrogateescape', newline=None) def readline(self): - return self.__ansel_to_unicode(self.ifile.readline()) + line = self.ifile.readline() + linebytes = line.encode(encoding='ascii', + errors='surrogateescape') + return self.__ansel_to_unicode(linebytes) #------------------------------------------------------------------------- # @@ -2673,8 +2681,8 @@ class GedcomParser(UpdateCallback): if enc == "ANSEL": rdr = AnselReader(ifile, self.__add_msg) - elif enc in ("UTF-8", "UTF8"): - rdr = UTF8Reader(ifile, self.__add_msg) + elif enc in ("UTF-8", "UTF8", "UTF_8_SIG"): + rdr = UTF8Reader(ifile, self.__add_msg, enc) elif enc in ("UTF-16LE", "UTF-16BE", "UTF16", "UNICODE"): rdr = UTF16Reader(ifile, self.__add_msg) elif enc in ("CP1252", "WINDOWS-1252"): @@ -7772,26 +7780,33 @@ class GedcomStageOne: def __detect_file_decoder(self, input_file): """ Detects the file encoding of the file by looking for a BOM - (byte order marker) in the GEDCOM file. If we detect a UTF-16 - encoded file, we must connect to a wrapper using the codecs - package. + (byte order marker) in the GEDCOM file. If we detect a UTF-16 or + UTF-8-BOM encoded file, we choose appropriate decoders. If no BOM + is detected, we return in UTF-8 mode it is the more modern option; + and anyway it doesn't really matter as we are only looking for GEDCOM + keywords which are only 7-bit ASCII anyway. + In any case, we Always return the file in text mode with transparent + newline (CR, LF, or CRLF). """ line = input_file.read(2) if line == b"\xef\xbb": input_file.read(1) - self.enc = "UTF8" - return input_file + self.enc = "utf_8_sig" + return TextIOWrapper(input_file, encoding='utf_8_sig', + errors='replace', newline=None) elif line == b"\xff\xfe" or line == b"\xfe\xff": self.enc = "UTF16" input_file.seek(0) - return codecs.EncodedFile(input_file, 'utf8', 'utf16') - elif not line : + return TextIOWrapper(input_file, encoding='utf_16', + errors='replace', newline=None) + elif not line: raise GedcomError(self.__EMPTY_GED) - elif line[0] == b"\x00" or line[1] == b"\x00": + elif line == b"\x30\x00" or line == b"\x00\x30": raise GedcomError(self.__BAD_UTF16) else: input_file.seek(0) - return input_file + return TextIOWrapper(input_file, encoding='utf-8', + errors='replace', newline=None) def parse(self): """ @@ -7802,12 +7817,8 @@ class GedcomStageOne: reader = self.__detect_file_decoder(self.ifile) for line in reader: - # Treat the file as though it is UTF-8 since this will be right if a - # BOM was detected; it is the more modern option; and anyway it - # doesn't really matter as we are only trying to detect a CHAR line - # which is only 7-bit ASCII anyway, and we ignore anything that - # can't be translated. - line = line.decode(encoding='utf-8', errors='replace') + # Scan for a few items, keep counts. Also look for actual CHAR + # Keyword to figure out actual encodeing for non-unicode file types line = line.strip() if not line: continue @@ -7838,6 +7849,7 @@ class GedcomStageOne: LOG.debug("parse pcnt %d" % self.pcnt) LOG.debug("parse famc %s" % dict(self.famc)) LOG.debug("parse fams %s" % dict(self.fams)) + self.ifile = reader # need this to keep python from autoclosing file def get_famc_map(self): """