From 473e3f4d0334ebfd0fb2374525232c72bdaf29d4 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sun, 28 Feb 2021 16:54:12 -0800 Subject: [PATCH 01/55] add cool enum & trait for generic add data --- src/tree.rs | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index e8210e2..d99e0c6 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] /// The data structure representing all the data within a gedcom file -pub struct GedcomData { +pub struct Gedcom { /// Header containing file metadata pub header: Header, /// List of submitters of the facts @@ -23,7 +23,19 @@ pub struct GedcomData { } // should maybe store these by xref if available? -impl GedcomData { +impl Gedcom { + pub(crate) fn add(&mut self, data: Box) { + match data.get_type() { + GedcomDataType::Family(family) => self.families.push(family), + GedcomDataType::Header(header) => self.header = header, + GedcomDataType::Individual(person) => self.individuals.push(person), + GedcomDataType::Media(media) => self.multimedia.push(media), + GedcomDataType::Repository(repo) => self.repositories.push(repo), + GedcomDataType::Source(source) => self.sources.push(source), + GedcomDataType::Submitter(submitter) => self.submitters.push(submitter), + } + } + /// Adds a `Family` (a relationship between individuals) to the tree pub fn add_family(&mut self, family: Family) { self.families.push(family); @@ -63,3 +75,18 @@ impl GedcomData { println!("----------------------"); } } + +/// Type of data that can be added to a Gedcom tree. +pub(crate) enum GedcomDataType { + Family(Family), + Header(Header), + Individual(Individual), + Media(Media), + Repository(Repository), + Source(Source), + Submitter(Submitter), +} + +pub(crate) trait GedcomData { + fn get_type(&self) -> GedcomDataType; +} From e702719fea52245a8198a44e7f6dc067d6212845 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sun, 28 Feb 2021 17:33:54 -0800 Subject: [PATCH 02/55] start refactor into parsing with Parsable trait --- src/lib.rs | 2 +- src/parser.rs | 88 +++++++++++++++++++------------------------- src/tree.rs | 5 ++- src/types/address.rs | 60 +++++++++++++++++++++++++++++- 4 files changed, 100 insertions(+), 55 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7adf1e8..70da3c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,7 @@ pub mod tokenizer; pub mod types; mod tree; -pub use tree::GedcomData; +pub use tree::Gedcom as GedcomData; #[must_use] /// Helper function for converting GEDCOM file content stream to parsed data. diff --git a/src/parser.rs b/src/parser.rs index 6473047..337fdfb 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,8 @@ //! The state machine that parses a char iterator of the gedcom's contents -use std::{panic, str::Chars}; +use std::{error::Error, fmt, panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; -use crate::tree::GedcomData; +use crate::tree::Gedcom; use crate::types::{ event::HasEvents, Address, CustomData, Event, Family, FamilyLink, Gender, Header, Individual, Name, RepoCitation, Repository, Source, SourceCitation, Submitter, @@ -10,9 +10,11 @@ use crate::types::{ /// The Gedcom parser that converts the token list into a data structure pub struct Parser<'a> { - tokenizer: Tokenizer<'a>, + pub(crate) tokenizer: Tokenizer<'a>, } +// TODO: expose useful helpers without publicizing tokenizer + impl<'a> Parser<'a> { /// Creates a parser state machine for parsing a gedcom file as a chars iterator #[must_use] @@ -23,8 +25,8 @@ impl<'a> Parser<'a> { } /// Does the actual parsing of the record. - pub fn parse_record(&mut self) -> GedcomData { - let mut data = GedcomData::default(); + pub fn parse_record(&mut self) -> Gedcom { + let mut data = Gedcom::default(); loop { let level = match self.tokenizer.current_token { Token::Level(n) => n, @@ -461,51 +463,10 @@ impl<'a> Parser<'a> { /// Parses ADDR tag fn parse_address(&mut self, level: u8) -> Address { - // skip ADDR tag - self.tokenizer.next_token(); - let mut address = Address::default(); - let mut value = String::new(); - - // handle value on ADDR line - if let Token::LineValue(addr) = &self.tokenizer.current_token { - value.push_str(addr); - self.tokenizer.next_token(); - } - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" => { - value.push('\n'); - value.push_str(&self.take_line_value()); - } - "ADR1" => address.adr1 = Some(self.take_line_value()), - "ADR2" => address.adr2 = Some(self.take_line_value()), - "ADR3" => address.adr3 = Some(self.take_line_value()), - "CITY" => address.city = Some(self.take_line_value()), - "STAE" => address.state = Some(self.take_line_value()), - "POST" => address.post = Some(self.take_line_value()), - "CTRY" => address.country = Some(self.take_line_value()), - _ => panic!("{} Unhandled Address Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Address Token: {:?}", - self.tokenizer.current_token - ), - } - } - - if &value != "" { - address.value = Some(value); + match Address::parse(self, level) { + Ok(addr) => addr, + Err(e) => panic!("address fail: {:?}", e), } - - address } fn parse_citation(&mut self, level: u8) -> SourceCitation { @@ -569,7 +530,7 @@ impl<'a> Parser<'a> { } /// Grabs and returns to the end of the current line as a String - fn take_line_value(&mut self) -> String { + pub(crate) fn take_line_value(&mut self) -> String { let value: String; self.tokenizer.next_token(); @@ -587,7 +548,32 @@ impl<'a> Parser<'a> { } /// Debug function displaying GEDCOM line number of error message. - fn dbg(&self) -> String { + pub(crate) fn dbg(&self) -> String { format!("line {}:", self.tokenizer.line) } } + +/// Trait given to data types that can be parsed into `GedcomData` +pub trait Parsable { + /// Parses an object by iterating through the `parser` until no longer at given + /// `level` or deeper. + /// + /// # Errors + /// Raises a `ParsingError` when unhandled or unexpected tokens are found. + fn parse(parser: &mut Parser, level: u8) -> Result; +} + +#[derive(Debug)] +/// Error indicating unhandled or unexpected token encountered. +pub struct ParsingError { + line: usize, + token: Token, +} + +impl Error for ParsingError {} + +impl fmt::Display for ParsingError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", format!("line: {}\n{:?}", self.line, self.token)) + } +} diff --git a/src/tree.rs b/src/tree.rs index d99e0c6..e23b2b6 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -24,7 +24,7 @@ pub struct Gedcom { // should maybe store these by xref if available? impl Gedcom { - pub(crate) fn add(&mut self, data: Box) { + pub(crate) fn add(&mut self, data: &Box) { match data.get_type() { GedcomDataType::Family(family) => self.families.push(family), GedcomDataType::Header(header) => self.header = header, @@ -33,6 +33,7 @@ impl Gedcom { GedcomDataType::Repository(repo) => self.repositories.push(repo), GedcomDataType::Source(source) => self.sources.push(source), GedcomDataType::Submitter(submitter) => self.submitters.push(submitter), + GedcomDataType::Other(s) => println!("Unhandled datatype: {}", s), } } @@ -77,6 +78,7 @@ impl Gedcom { } /// Type of data that can be added to a Gedcom tree. +#[derive(Debug)] pub(crate) enum GedcomDataType { Family(Family), Header(Header), @@ -85,6 +87,7 @@ pub(crate) enum GedcomDataType { Repository(Repository), Source(Source), Submitter(Submitter), + Other(String), } pub(crate) trait GedcomData { diff --git a/src/types/address.rs b/src/types/address.rs index 7fe22ef..d1c536a 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -2,8 +2,11 @@ use serde::{Deserialize, Serialize}; use std::fmt; +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; + /// Physical address at which a fact occurs -#[derive(Default)] +#[derive(Default, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Address { pub value: Option, @@ -16,7 +19,7 @@ pub struct Address { pub country: Option, } -impl fmt::Debug for Address { +impl fmt::Display for Address { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut debug = f.debug_struct("Address"); @@ -32,3 +35,56 @@ impl fmt::Debug for Address { debug.finish() } } + +impl Parsable
for Address { + fn parse(parser: &mut Parser, level: u8) -> Result { + // skip ADDR tag + if let Token::Tag(_) = &parser.tokenizer.current_token { + parser.tokenizer.next_token(); + } + + let mut address = Address::default(); + let mut value = String::new(); + + // handle value on ADDR line + if let Token::LineValue(addr) = &parser.tokenizer.current_token { + value.push_str(addr); + parser.tokenizer.next_token(); + } + + loop { + if let Token::Level(cur_level) = parser.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONT" => { + value.push('\n'); + value.push_str(&parser.take_line_value()); + } + "ADR1" => address.adr1 = Some(parser.take_line_value()), + "ADR2" => address.adr2 = Some(parser.take_line_value()), + "ADR3" => address.adr3 = Some(parser.take_line_value()), + "CITY" => address.city = Some(parser.take_line_value()), + "STAE" => address.state = Some(parser.take_line_value()), + "POST" => address.post = Some(parser.take_line_value()), + "CTRY" => address.country = Some(parser.take_line_value()), + // TODO ParsingError + _ => panic!("{} Unhandled Address Tag: {}", parser.dbg(), tag), + }, + Token::Level(_) => parser.tokenizer.next_token(), + _ => panic!( + "Unhandled Address Token: {:?}", + parser.tokenizer.current_token + ), + } + } + + if &value != "" { + address.value = Some(value); + } + Ok(address) + } +} From 5926a7e44e01faec3fecf347e3806a44e40effc4 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sun, 28 Feb 2021 18:19:56 -0800 Subject: [PATCH 03/55] add Parsable trait to Event --- src/parser.rs | 45 ++++++++++++++--------------------------- src/tokenizer.rs | 9 ++++++++- src/types/event.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++- src/types/source.rs | 1 + 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 337fdfb..0f8c67f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -179,8 +179,7 @@ impl<'a> Parser<'a> { "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { - let tag_clone = tag.clone(); - individual.add_event(self.parse_event(tag_clone.as_str(), level + 1)); + individual.add_event(self.parse_event(level + 1)); } "FAMC" | "FAMS" => { let tag_clone = tag.clone(); @@ -219,7 +218,7 @@ impl<'a> Parser<'a> { while self.tokenizer.current_token != Token::Level(level) { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "MARR" => family.add_event(self.parse_event("MARR", level + 1)), + "MARR" => family.add_event(self.parse_event(level + 1)), "HUSB" => family.set_individual1(self.take_line_value()), "WIFE" => family.set_individual2(self.take_line_value()), "CHIL" => family.add_child(self.take_line_value()), @@ -248,12 +247,13 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "DATA" => self.tokenizer.next_token(), - "EVEN" => { - let events_recorded = self.take_line_value(); - let mut event = self.parse_event("OTHER", level + 2); - event.with_source_data(events_recorded); - source.data.add_event(event); - } + // TODO: fix weird date parsing. + // "EVEN" => { + // let events_recorded = self.take_line_value(); + // let mut event = self.parse_event(level + 2); + // event.with_source_data(events_recorded); + // source.data.add_event(event); + // } "AGNC" => source.data.agency = Some(self.take_line_value()), "ABBR" => source.abbreviation = Some(self.take_continued_text(level + 1)), "TITL" => source.title = Some(self.take_continued_text(level + 1)), @@ -438,27 +438,11 @@ impl<'a> Parser<'a> { name } - fn parse_event(&mut self, tag: &str, level: u8) -> Event { - self.tokenizer.next_token(); - let mut event = Event::from_tag(tag); - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => event.date = Some(self.take_line_value()), - "PLAC" => event.place = Some(self.take_line_value()), - "SOUR" => event.add_citation(self.parse_citation(level + 1)), - _ => panic!("{} Unhandled Event Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Event Token: {:?}", self.tokenizer.current_token), - } + fn parse_event(&mut self, level: u8) -> Event { + match Event::parse(self, level) { + Ok(event) => event, + Err(e) => panic!("event parsing fail: {:?}", e), } - event } /// Parses ADDR tag @@ -469,7 +453,8 @@ impl<'a> Parser<'a> { } } - fn parse_citation(&mut self, level: u8) -> SourceCitation { + // TODO Citation::parse + pub(crate) fn parse_citation(&mut self, level: u8) -> SourceCitation { let mut citation = SourceCitation { xref: self.take_line_value(), page: None, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 784a7de..6dad6c7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -5,7 +5,7 @@ use std::str::Chars; /// /// making use of [GEDCOM Standard Release 5.5.1](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf), p.11 /// `gedcom_line: level + delim + [optional_xref_ID] + tag + [optional_line_value] + terminator` -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Token { /// The `level`, denoting the depth within the tree Level(u8), @@ -100,6 +100,13 @@ impl<'a> Tokenizer<'a> { }; } + /// Like `next_token`, but returns a clone of the token you are popping. + pub fn take_token(&mut self) -> Token { + let current_token = self.current_token.clone(); + self.next_token(); + return current_token; + } + fn next_char(&mut self) { self.current_char = self.chars.next().unwrap_or('\0'); } diff --git a/src/types/event.rs b/src/types/event.rs index 549be52..2c475d7 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,4 +1,7 @@ +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; use crate::types::SourceCitation; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; use std::{fmt, string::ToString}; @@ -63,7 +66,10 @@ impl Event { "MARR" => EventType::Marriage, "RESI" => EventType::Residence, "OTHER" => EventType::Other, - _ => panic!("Unrecognized event tag: {}", tag), + _ => { + println!("Unrecognized event tag: {}", tag); + EventType::Other + } }; Event::new(etype) } @@ -78,6 +84,47 @@ impl Event { } } +impl Parsable for Event { + fn parse(parser: &mut Parser, level: u8) -> Result { + // extract current tag name to determine event type. + let event_tag_token = parser.tokenizer.take_token(); + let tag: &str = if let Token::Tag(t) = &event_tag_token { + t.as_str().clone() + } else { + panic!( + "Expected event tag, found {:?}", + &parser.tokenizer.current_token + ); + }; + + let mut event = Event::from_tag(tag); + loop { + if let Token::Level(cur_level) = parser.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => event.date = Some(parser.take_line_value()), + "PLAC" => event.place = Some(parser.take_line_value()), + // TODO Citation::parse + "SOUR" => event.add_citation(parser.parse_citation(level + 1)), + _ => panic!("{} Unhandled Event Tag: {}", parser.dbg(), tag), + }, + Token::Level(_) => { + parser.tokenizer.next_token(); + } + _ => panic!( + "Unhandled Event Token: {:?}", + parser.tokenizer.current_token + ), + } + } + Ok(event) + } +} + impl std::fmt::Debug for Event { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let event_type = format!("{:?} Event", &self.event); diff --git a/src/types/source.rs b/src/types/source.rs index cfce2fd..b08536c 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,4 +1,5 @@ use crate::types::{Event, RepoCitation}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; From b58a287a09a28f3079e55ec1a9c16287768b4abb Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sun, 28 Feb 2021 21:01:16 -0800 Subject: [PATCH 04/55] implement Parsable for Header --- src/parser.rs | 60 +++--------------------------------------- src/types/header.rs | 63 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 57 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 0f8c67f..1b22676 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -47,7 +47,7 @@ impl<'a> Parser<'a> { if let Token::Tag(tag) = &self.tokenizer.current_token { match tag.as_str() { - "HEAD" => data.header = self.parse_header(), + "HEAD" => data.header = Header::parse(self, 0).unwrap(), "FAM" => data.add_family(self.parse_family(level, pointer)), "INDI" => data.add_individual(self.parse_individual(level, pointer)), "REPO" => data.add_repository(self.parse_repository(level, pointer)), @@ -84,60 +84,6 @@ impl<'a> Parser<'a> { data } - /// Parses HEAD top-level tag - fn parse_header(&mut self) -> Header { - // skip over HEAD tag name - self.tokenizer.next_token(); - - let mut header = Header::default(); - - // just skipping the header for now - while self.tokenizer.current_token != Token::Level(0) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - // TODO: CHAR.VERS - "CHAR" => header.encoding = Some(self.take_line_value()), - "CORP" => header.corporation = Some(self.take_line_value()), - "COPR" => header.copyright = Some(self.take_line_value()), - "DATE" => header.date = Some(self.take_line_value()), - "DEST" => header.add_destination(self.take_line_value()), - "LANG" => header.language = Some(self.take_line_value()), - "FILE" => header.filename = Some(self.take_line_value()), - "NOTE" => header.note = Some(self.take_continued_text(1)), - "SUBM" => header.submitter_tag = Some(self.take_line_value()), - "SUBN" => header.submission_tag = Some(self.take_line_value()), - "TIME" => { - let time = self.take_line_value(); - // assuming subtag of DATE - if let Some(date) = header.date { - let mut datetime = String::new(); - datetime.push_str(&date); - datetime.push_str(" "); - datetime.push_str(&time); - header.date = Some(datetime); - } else { - panic!("Expected TIME to be under DATE in header."); - } - } - "GEDC" => { - header = self.parse_gedcom_data(header); - } - // TODO: HeaderSource - "SOUR" => { - println!("WARNING: Skipping header source."); - while self.tokenizer.current_token != Token::Level(1) { - self.tokenizer.next_token(); - } - } - _ => panic!("{} Unhandled Header Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Header Token: {:?}", self.tokenizer.current_token), - } - } - header - } - /// Parses SUBM top-level tag fn parse_submitter(&mut self, level: u8, xref: Option) -> Submitter { // skip over SUBM tag name @@ -307,7 +253,7 @@ impl<'a> Parser<'a> { } /// Handle parsing GEDC tag - fn parse_gedcom_data(&mut self, mut header: Header) -> Header { + pub(crate) fn parse_gedcom_data(&mut self, mut header: Header) -> Header { // skip GEDC tag self.tokenizer.next_token(); @@ -482,7 +428,7 @@ impl<'a> Parser<'a> { /// Takes the value of the current line including handling /// multi-line values from CONT & CONC tags. - fn take_continued_text(&mut self, level: u8) -> String { + pub(crate) fn take_continued_text(&mut self, level: u8) -> String { let mut value = self.take_line_value(); loop { diff --git a/src/types/header.rs b/src/types/header.rs index cfa2750..192a3aa 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,4 +1,7 @@ +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; use crate::types::Source; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -35,3 +38,63 @@ impl Header { // name: Option, // coroporation: // } + +impl Parsable
for Header { + /// Parses HEAD top-level tag + fn parse(parser: &mut Parser, level: u8) -> Result { + // skip over HEAD tag name + parser.tokenizer.next_token(); + + let mut header = Header::default(); + + // just skipping the header for now + while parser.tokenizer.current_token != Token::Level(level) { + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + // TODO: CHAR.VERS - version + "CHAR" => header.encoding = Some(parser.take_line_value()), + "CORP" => header.corporation = Some(parser.take_line_value()), + "COPR" => header.copyright = Some(parser.take_line_value()), + "DATE" => header.date = Some(parser.take_line_value()), + "DEST" => header.add_destination(parser.take_line_value()), + "LANG" => header.language = Some(parser.take_line_value()), + "FILE" => header.filename = Some(parser.take_line_value()), + "NOTE" => header.note = Some(parser.take_continued_text(1)), + "SUBM" => header.submitter_tag = Some(parser.take_line_value()), + "SUBN" => header.submission_tag = Some(parser.take_line_value()), + "TIME" => { + let time = parser.take_line_value(); + // assuming subtag of DATE + if let Some(date) = header.date { + let mut datetime = String::new(); + datetime.push_str(&date); + datetime.push_str(" "); + datetime.push_str(&time); + header.date = Some(datetime); + } else { + panic!("Expected TIME to be under DATE in header."); + } + } + "GEDC" => { + header = parser.parse_gedcom_data(header); + } + // TODO: HeaderSource + "SOUR" => { + println!("WARNING: Skipping header source."); + while parser.tokenizer.current_token != Token::Level(1) { + parser.tokenizer.next_token(); + } + } + _ => panic!("{} Unhandled Header Tag: {}", parser.dbg(), tag), + }, + Token::Level(_) => parser.tokenizer.next_token(), + _ => panic!( + "Unhandled Header Token: {:?}", + parser.tokenizer.current_token + ), + } + } + + Ok(header) + } +} From dc979508a99be4a562f2f0c20f4180f0a1c50f40 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sun, 28 Feb 2021 21:19:15 -0800 Subject: [PATCH 05/55] add Parsable to Family --- src/parser.rs | 28 +++------------------ src/types/address.rs | 6 ++--- src/types/family.rs | 58 ++++++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 43 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 1b22676..55a7729 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -48,7 +48,9 @@ impl<'a> Parser<'a> { if let Token::Tag(tag) = &self.tokenizer.current_token { match tag.as_str() { "HEAD" => data.header = Header::parse(self, 0).unwrap(), - "FAM" => data.add_family(self.parse_family(level, pointer)), + "FAM" => { + data.add_family(Family::parse(self, level).unwrap().with_xref(pointer)) + } "INDI" => data.add_individual(self.parse_individual(level, pointer)), "REPO" => data.add_repository(self.parse_repository(level, pointer)), "SOUR" => data.add_source(self.parse_source(level, pointer)), @@ -155,30 +157,6 @@ impl<'a> Parser<'a> { individual } - /// Parses FAM top-level tag - fn parse_family(&mut self, level: u8, xref: Option) -> Family { - // skip over FAM tag name - self.tokenizer.next_token(); - let mut family = Family::new(xref); - - while self.tokenizer.current_token != Token::Level(level) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MARR" => family.add_event(self.parse_event(level + 1)), - "HUSB" => family.set_individual1(self.take_line_value()), - "WIFE" => family.set_individual2(self.take_line_value()), - "CHIL" => family.add_child(self.take_line_value()), - _ => panic!("{} Unhandled Family Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Family Token: {:?}", self.tokenizer.current_token), - } - } - - // println!("found family:\n{:#?}", family); - family - } - fn parse_source(&mut self, level: u8, xref: Option) -> Source { // skip SOUR tag self.tokenizer.next_token(); diff --git a/src/types/address.rs b/src/types/address.rs index d1c536a..27b0eb4 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -1,10 +1,10 @@ +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; use std::fmt; -use crate::parser::{Parsable, Parser, ParsingError}; -use crate::tokenizer::Token; - /// Physical address at which a fact occurs #[derive(Default, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] diff --git a/src/types/family.rs b/src/types/family.rs index c666ac4..93a3d46 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,4 +1,7 @@ +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; use crate::types::{event::HasEvents, Event}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -8,28 +11,22 @@ type Xref = String; /// /// This data representation understands that HUSB & WIFE are just poorly-named /// pointers to individuals. no gender "validating" is done on parse. -#[derive(Debug)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Family { pub xref: Option, - pub individual1: Option, // mapped from HUSB - pub individual2: Option, // mapped from WIFE + /// mapped from HUSB + pub individual1: Option, + /// mapped from WIFE + pub individual2: Option, pub children: Vec, pub num_children: Option, events: Vec, } impl Family { - #[must_use] - pub fn new(xref: Option) -> Family { - Family { - xref, - individual1: None, - individual2: None, - children: Vec::new(), - num_children: None, - events: Vec::new(), - } + pub fn add_child(&mut self, xref: Xref) { + self.children.push(xref); } pub fn set_individual1(&mut self, xref: Xref) { @@ -46,8 +43,10 @@ impl Family { }; } - pub fn add_child(&mut self, xref: Xref) { - self.children.push(xref); + #[must_use] + pub fn with_xref(mut self, xref: Option) -> Family { + self.xref = xref; + self } } @@ -65,3 +64,32 @@ impl HasEvents for Family { self.events.clone() } } + +impl Parsable for Family { + /// Parses FAM top-level tag + fn parse(parser: &mut Parser, level: u8) -> Result { + // skip over FAM tag name + parser.tokenizer.next_token(); + let mut family = Family::default(); + + while parser.tokenizer.current_token != Token::Level(level) { + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "MARR" => family.add_event(Event::parse(parser, level + 1).unwrap()), + "HUSB" => family.set_individual1(parser.take_line_value()), + "WIFE" => family.set_individual2(parser.take_line_value()), + "CHIL" => family.add_child(parser.take_line_value()), + _ => panic!("{} Unhandled Family Tag: {}", parser.dbg(), tag), + }, + Token::Level(_) => parser.tokenizer.next_token(), + _ => panic!( + "Unhandled Family Token: {:?}", + parser.tokenizer.current_token + ), + } + } + + // println!("found family:\n{:#?}", family); + Ok(family) + } +} From 5032f170fb0d16f276c6c68d64483040221d7b70 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Thu, 18 Mar 2021 22:00:40 -0700 Subject: [PATCH 06/55] add handle_unexpected_token debug func --- src/parser.rs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 6473047..c84ae0e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -130,7 +130,7 @@ impl<'a> Parser<'a> { _ => panic!("{} Unhandled Header Tag: {}", self.dbg(), tag), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Header Token: {:?}", self.tokenizer.current_token), + _ => self.handle_unexpected_token(1, "HEAD"), } } header @@ -325,11 +325,7 @@ impl<'a> Parser<'a> { _ => panic!("{} Unhandled GEDC Tag: {}", self.dbg(), tag), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "{} Unexpected GEDC Token: {:?}", - self.dbg(), - &self.tokenizer.current_token - ), + _ => self.handle_unexpected_token(2, "GEDC"), } } header @@ -586,6 +582,27 @@ impl<'a> Parser<'a> { value } + fn handle_unexpected_token(&mut self, level: u8, base_tag: &str) { + println!( + "{} Unexpected {} Token: {:?}", + self.dbg(), + base_tag, + &self.tokenizer.current_token + ); + self.skip_block(level); + } + + fn skip_block(&mut self, level: u8) { + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + self.tokenizer.next_token(); + } + } + /// Debug function displaying GEDCOM line number of error message. fn dbg(&self) -> String { format!("line {}:", self.tokenizer.line) From ebc5339f35a11e66f2bc33e577aa80ad16506925 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Thu, 18 Mar 2021 22:31:30 -0700 Subject: [PATCH 07/55] skip some unhandled tags, don't panic! --- src/parser.rs | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index c84ae0e..3c76237 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -191,7 +191,7 @@ impl<'a> Parser<'a> { self.tokenizer.next_token(); // DATE tag individual.last_updated = Some(self.take_line_value()); } - _ => panic!("{} Unhandled Individual Tag: {}", self.dbg(), tag), + _ => self.handle_unknown_tag(level + 1, "Individual"), }, Token::CustomTag(tag) => { let tag_clone = tag.clone(); @@ -408,10 +408,11 @@ impl<'a> Parser<'a> { fn parse_name(&mut self, level: u8) -> Name { let mut name = Name::default(); name.value = Some(self.take_line_value()); + let mut cur_level = level; loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if let Token::Level(new_level) = self.tokenizer.current_token { + if new_level <= cur_level { break; } } @@ -422,9 +423,12 @@ impl<'a> Parser<'a> { "NSFX" => name.suffix = Some(self.take_line_value()), "SPFX" => name.surname_prefix = Some(self.take_line_value()), "SURN" => name.surname = Some(self.take_line_value()), - _ => panic!("{} Unhandled Name Tag: {}", self.dbg(), tag), + _ => self.handle_unknown_tag(cur_level, "Name"), }, - Token::Level(_) => self.tokenizer.next_token(), + Token::Level(_) => { + cur_level += 1; + self.tokenizer.next_token() + } _ => panic!("Unhandled Name Token: {:?}", self.tokenizer.current_token), } } @@ -446,7 +450,7 @@ impl<'a> Parser<'a> { "DATE" => event.date = Some(self.take_line_value()), "PLAC" => event.place = Some(self.take_line_value()), "SOUR" => event.add_citation(self.parse_citation(level + 1)), - _ => panic!("{} Unhandled Event Tag: {}", self.dbg(), tag), + _ => self.handle_unknown_tag(level + 1, "Event"), }, Token::Level(_) => self.tokenizer.next_token(), _ => panic!("Unhandled Event Token: {:?}", self.tokenizer.current_token), @@ -518,13 +522,10 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PAGE" => citation.page = Some(self.take_line_value()), - _ => panic!("{} Unhandled Citation Tag: {}", self.dbg(), tag), + _ => self.handle_unknown_tag(level + 1, "Citation"), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Citation Token: {:?}", - self.tokenizer.current_token - ), + _ => self.handle_unexpected_token(level + 1, "Citation"), } } citation @@ -582,10 +583,22 @@ impl<'a> Parser<'a> { value } + fn handle_unknown_tag(&mut self, level: u8, parent_name: &str) { + if let Token::Tag(tag) = &self.tokenizer.current_token { + println!( + "{} Unhandled {} Tag: {}", + self.dbg_lvl(level), + parent_name, + tag + ); + } + self.skip_block(level); + } + fn handle_unexpected_token(&mut self, level: u8, base_tag: &str) { println!( - "{} Unexpected {} Token: {:?}", - self.dbg(), + "{} Unhandled {} Token: {:?}", + self.dbg_lvl(level), base_tag, &self.tokenizer.current_token ); @@ -603,6 +616,10 @@ impl<'a> Parser<'a> { } } + fn dbg_lvl(&self, level: u8) -> String { + format!("line {}, level {}:", self.tokenizer.line, level) + } + /// Debug function displaying GEDCOM line number of error message. fn dbg(&self) -> String { format!("line {}:", self.tokenizer.line) From e9f76615f6b60394ea72795c7c95f9bf5c807b7f Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Fri, 19 Mar 2021 18:54:42 -0700 Subject: [PATCH 08/55] better debug function names & more graceful skips --- src/parser.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 3c76237..0d48ffe 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -127,7 +127,7 @@ impl<'a> Parser<'a> { self.tokenizer.next_token(); } } - _ => panic!("{} Unhandled Header Tag: {}", self.dbg(), tag), + _ => self.skip_current_tag(1, "Header"), }, Token::Level(_) => self.tokenizer.next_token(), _ => self.handle_unexpected_token(1, "HEAD"), @@ -150,16 +150,12 @@ impl<'a> Parser<'a> { submitter.address = Some(self.parse_address(level + 1)); } "PHON" => submitter.phone = Some(self.take_line_value()), - _ => panic!("{} Unhandled Submitter Tag: {}", self.dbg(), tag), + _ => self.skip_current_tag(level + 1, "Submitter"), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Submitter Token: {:?}", - self.tokenizer.current_token - ), + _ => self.handle_unexpected_token(level + 1, "SUBM"), } } - // println!("found submitter:\n{:#?}", submitter); submitter } @@ -191,7 +187,7 @@ impl<'a> Parser<'a> { self.tokenizer.next_token(); // DATE tag individual.last_updated = Some(self.take_line_value()); } - _ => self.handle_unknown_tag(level + 1, "Individual"), + _ => self.skip_current_tag(level + 1, "Individual"), }, Token::CustomTag(tag) => { let tag_clone = tag.clone(); @@ -423,7 +419,7 @@ impl<'a> Parser<'a> { "NSFX" => name.suffix = Some(self.take_line_value()), "SPFX" => name.surname_prefix = Some(self.take_line_value()), "SURN" => name.surname = Some(self.take_line_value()), - _ => self.handle_unknown_tag(cur_level, "Name"), + _ => self.skip_current_tag(cur_level, "Name"), }, Token::Level(_) => { cur_level += 1; @@ -450,7 +446,7 @@ impl<'a> Parser<'a> { "DATE" => event.date = Some(self.take_line_value()), "PLAC" => event.place = Some(self.take_line_value()), "SOUR" => event.add_citation(self.parse_citation(level + 1)), - _ => self.handle_unknown_tag(level + 1, "Event"), + _ => self.skip_current_tag(level + 1, "Event"), }, Token::Level(_) => self.tokenizer.next_token(), _ => panic!("Unhandled Event Token: {:?}", self.tokenizer.current_token), @@ -522,7 +518,7 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PAGE" => citation.page = Some(self.take_line_value()), - _ => self.handle_unknown_tag(level + 1, "Citation"), + _ => self.skip_current_tag(level + 1, "Citation"), }, Token::Level(_) => self.tokenizer.next_token(), _ => self.handle_unexpected_token(level + 1, "Citation"), @@ -583,7 +579,7 @@ impl<'a> Parser<'a> { value } - fn handle_unknown_tag(&mut self, level: u8, parent_name: &str) { + fn skip_current_tag(&mut self, level: u8, parent_name: &str) { if let Token::Tag(tag) = &self.tokenizer.current_token { println!( "{} Unhandled {} Tag: {}", @@ -591,6 +587,8 @@ impl<'a> Parser<'a> { parent_name, tag ); + } else { + panic!("Expected tag, found {:?}", &self.tokenizer.current_token); } self.skip_block(level); } From a6334b1d2aa2f57ae0d601bc48386f411a6e8cdd Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Fri, 19 Mar 2021 19:07:00 -0700 Subject: [PATCH 09/55] handle Y line value for events --- src/parser.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index 0d48ffe..1d73f35 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -449,6 +449,14 @@ impl<'a> Parser<'a> { _ => self.skip_current_tag(level + 1, "Event"), }, Token::Level(_) => self.tokenizer.next_token(), + // some events are also bool like w/ Y values, apparently? + Token::LineValue(v) => { + if v.as_str() != "Y" { + panic!("{} Surprise value {} as event value", self.dbg(), v); + } + // just skip Y's + self.tokenizer.next_token(); + } _ => panic!("Unhandled Event Token: {:?}", self.tokenizer.current_token), } } From b847200b1cf91aa0ad3a45c943e4c51e57879350 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Fri, 19 Mar 2021 20:10:07 -0700 Subject: [PATCH 10/55] handle all the events! :tada: :beers: --- src/parser.rs | 32 ++++++++++++++++++---- src/types/event.rs | 68 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 82 insertions(+), 18 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 1d73f35..4648140 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,8 +4,8 @@ use std::{panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; use crate::tree::GedcomData; use crate::types::{ - event::HasEvents, Address, CustomData, Event, Family, FamilyLink, Gender, Header, Individual, - Name, RepoCitation, Repository, Source, SourceCitation, Submitter, + event::HasEvents, Address, CustomData, Event, EventType, Family, FamilyLink, Gender, Header, + Individual, Name, RepoCitation, Repository, Source, SourceCitation, Submitter, }; /// The Gedcom parser that converts the token list into a data structure @@ -433,11 +433,27 @@ impl<'a> Parser<'a> { } fn parse_event(&mut self, tag: &str, level: u8) -> Event { + // Events begin with either EVEN , or a type tag. + let type_tag: &str = if tag == "EVEN" { + println!("{:?}", &self.tokenizer.current_token); + if let Token::LineValue(v) = &self.tokenizer.current_token { + v + } else { + // if there's no line value, there's probably a TYPE tag + "OTHER" + } + } else { + tag + }; + + println!("Event type: {}", &type_tag); + let mut event = Event::from_tag(&type_tag); + self.tokenizer.next_token(); - let mut event = Event::from_tag(tag); + loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if let Token::Level(cur_level) = &self.tokenizer.current_token { + if cur_level <= &level { break; } } @@ -445,6 +461,10 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "DATE" => event.date = Some(self.take_line_value()), "PLAC" => event.place = Some(self.take_line_value()), + "TYPE" => { + let type_tag = self.take_line_value(); + event.event = EventType::SourceData(type_tag); + } "SOUR" => event.add_citation(self.parse_citation(level + 1)), _ => self.skip_current_tag(level + 1, "Event"), }, @@ -457,7 +477,7 @@ impl<'a> Parser<'a> { // just skip Y's self.tokenizer.next_token(); } - _ => panic!("Unhandled Event Token: {:?}", self.tokenizer.current_token), + _ => panic!("Unhandled Event Token: {:?}", &self.tokenizer.current_token), } } event diff --git a/src/types/event.rs b/src/types/event.rs index 549be52..bd66861 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -8,12 +8,29 @@ use std::{fmt, string::ToString}; #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub enum EventType { Adoption, + Baptism, + BarMitzvah, + BasMitzvah, Birth, + Blessing, Burial, - Death, + Census, Christening, + ChristeningAdult, + Confirmation, + Cremation, + Death, + Emigration, + FirstCommunion, + Graduation, + Immigration, Marriage, + Naturalization, + Ordination, + Probate, Residence, + Retirement, + Will, SourceData(String), // "Other" is used to construct an event without requiring an explicit event type @@ -26,6 +43,43 @@ impl ToString for EventType { } } +impl EventType { + #[must_use] + pub fn from_tag(tag: &str) -> EventType { + match tag { + "ADOP" => EventType::Adoption, + "BAPM" => EventType::Baptism, + "BARM" => EventType::BarMitzvah, + "BASM" => EventType::BasMitzvah, + "BLES" => EventType::Blessing, + "BIRT" => EventType::Birth, + "BURI" => EventType::Burial, + "CENS" => EventType::Census, + "CHR" => EventType::Christening, + "CHRA" => EventType::ChristeningAdult, + "CONF" => EventType::Confirmation, + "CREM" => EventType::Cremation, + "DEAT" => EventType::Death, + "EMIG" => EventType::Emigration, + "FCOM" => EventType::FirstCommunion, + "GRAD" => EventType::Graduation, + "IMMI" => EventType::Immigration, + "MARR" => EventType::Marriage, + "NATU" => EventType::Naturalization, + "ORDN" => EventType::Ordination, + "PROB" => EventType::Probate, + "RESI" => EventType::Residence, + "RETI" => EventType::Retirement, + "WILL" => EventType::Will, + + "OTHER" => EventType::Other, + + "EVEN" => panic!("EVEN passed as event tag instead of value."), + _ => panic!("Unrecognized event tag: {}", tag), + } + } +} + /// Event fact #[derive(Clone)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -54,17 +108,7 @@ impl Event { #[must_use] pub fn from_tag(tag: &str) -> Event { - let etype = match tag { - "ADOP" => EventType::Adoption, - "BIRT" => EventType::Birth, - "BURI" => EventType::Burial, - "CHR" => EventType::Christening, - "DEAT" => EventType::Death, - "MARR" => EventType::Marriage, - "RESI" => EventType::Residence, - "OTHER" => EventType::Other, - _ => panic!("Unrecognized event tag: {}", tag), - }; + let etype = EventType::from_tag(tag); Event::new(etype) } From 8875d60c0b9af7b18a3567ca1ed56d29dc38cd98 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Fri, 19 Mar 2021 20:16:38 -0700 Subject: [PATCH 11/55] successfully parse allged.ged with only warnings! --- src/parser.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 4648140..67f42fc 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,8 +4,8 @@ use std::{panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; use crate::tree::GedcomData; use crate::types::{ - event::HasEvents, Address, CustomData, Event, EventType, Family, FamilyLink, Gender, Header, - Individual, Name, RepoCitation, Repository, Source, SourceCitation, Submitter, + event::HasEvents, Address, CustomData, Event, Family, FamilyLink, Gender, Header, Individual, + Name, RepoCitation, Repository, Source, SourceCitation, Submitter, }; /// The Gedcom parser that converts the token list into a data structure @@ -217,10 +217,10 @@ impl<'a> Parser<'a> { "HUSB" => family.set_individual1(self.take_line_value()), "WIFE" => family.set_individual2(self.take_line_value()), "CHIL" => family.add_child(self.take_line_value()), - _ => panic!("{} Unhandled Family Tag: {}", self.dbg(), tag), + _ => self.skip_current_tag(level + 1, "Family"), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Family Token: {:?}", self.tokenizer.current_token), + _ => self.handle_unexpected_token(level + 1, "FAM"), } } @@ -242,6 +242,7 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "DATA" => self.tokenizer.next_token(), + // TODO: cleanup to just use parse_event "EVEN" => { let events_recorded = self.take_line_value(); let mut event = self.parse_event("OTHER", level + 2); @@ -252,10 +253,10 @@ impl<'a> Parser<'a> { "ABBR" => source.abbreviation = Some(self.take_continued_text(level + 1)), "TITL" => source.title = Some(self.take_continued_text(level + 1)), "REPO" => source.add_repo_citation(self.parse_repo_citation(level + 1)), - _ => panic!("{} Unhandled Source Tag: {}", self.dbg(), tag), + _ => self.skip_current_tag(level + 1, "Source"), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Source Token: {:?}", self.tokenizer.current_token), + _ => self.handle_unexpected_token(level + 1, "SOUR"), } } @@ -340,7 +341,7 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PEDI" => link.set_pedigree(self.take_line_value().as_str()), - _ => panic!("{} Unhandled FamilyLink Tag: {}", self.dbg(), tag), + _ => self.skip_current_tag(level + 1, "FamilyLink"), }, Token::Level(_) => self.tokenizer.next_token(), _ => panic!( @@ -461,10 +462,7 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "DATE" => event.date = Some(self.take_line_value()), "PLAC" => event.place = Some(self.take_line_value()), - "TYPE" => { - let type_tag = self.take_line_value(); - event.event = EventType::SourceData(type_tag); - } + "TYPE" => event.with_source_data(self.take_line_value()), "SOUR" => event.add_citation(self.parse_citation(level + 1)), _ => self.skip_current_tag(level + 1, "Event"), }, From 62ddf9a925d571dd28a65b6f931b99c91fe3e84b Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Fri, 19 Mar 2021 21:14:10 -0700 Subject: [PATCH 12/55] allow repeat event facts for family --- src/parser.rs | 31 ++++++++++--------------------- src/types/family.rs | 6 ------ 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 67f42fc..378b98c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -53,8 +53,8 @@ impl<'a> Parser<'a> { "SUBM" => data.add_submitter(self.parse_submitter(level, pointer)), "TRLR" => break, _ => { - println!("{} Unhandled tag {}", self.dbg(), tag); - self.tokenizer.next_token(); + println!("{} Unhandled top-level data {}", self.dbg(), tag); + self.skip_block(level) } }; } else if let Token::CustomTag(tag) = &self.tokenizer.current_token { @@ -66,9 +66,7 @@ impl<'a> Parser<'a> { self.dbg(), custom_data ); - while self.tokenizer.current_token != Token::Level(0) { - self.tokenizer.next_token(); - } + self.skip_block(level); } else { println!( "{} Unhandled token {:?}", @@ -200,7 +198,7 @@ impl<'a> Parser<'a> { ), } } - // println!("found individual:\n{:#?}", individual); + individual } @@ -224,7 +222,6 @@ impl<'a> Parser<'a> { } } - // println!("found family:\n{:#?}", family); family } @@ -260,7 +257,6 @@ impl<'a> Parser<'a> { } } - // println!("found source:\n{:#?}", source); source } @@ -283,16 +279,13 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "NAME" => repo.name = Some(self.take_line_value()), "ADDR" => repo.address = Some(self.parse_address(level + 1)), - _ => panic!("{} Unhandled Repository Tag: {}", self.dbg(), tag), + _ => self.skip_current_tag(level + 1, "Repository"), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Repository Token: {:?}", - self.tokenizer.current_token - ), + _ => self.handle_unexpected_token(level + 1, "REPO"), } } - // println!("found repositiory:\n{:#?}", repo); + repo } @@ -344,10 +337,7 @@ impl<'a> Parser<'a> { _ => self.skip_current_tag(level + 1, "FamilyLink"), }, Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled FamilyLink Token: {:?}", - self.tokenizer.current_token - ), + _ => self.handle_unexpected_token(level + 1, "FamilyLink"), } } @@ -447,7 +437,6 @@ impl<'a> Parser<'a> { tag }; - println!("Event type: {}", &type_tag); let mut event = Event::from_tag(&type_tag); self.tokenizer.next_token(); @@ -467,7 +456,7 @@ impl<'a> Parser<'a> { _ => self.skip_current_tag(level + 1, "Event"), }, Token::Level(_) => self.tokenizer.next_token(), - // some events are also bool like w/ Y values, apparently? + // some events are also bool-like w/ Y values, apparently? Token::LineValue(v) => { if v.as_str() != "Y" { panic!("{} Surprise value {} as event value", self.dbg(), v); @@ -475,7 +464,7 @@ impl<'a> Parser<'a> { // just skip Y's self.tokenizer.next_token(); } - _ => panic!("Unhandled Event Token: {:?}", &self.tokenizer.current_token), + _ => self.handle_unexpected_token(level + 1, "Event"), } } event diff --git a/src/types/family.rs b/src/types/family.rs index c666ac4..241fca7 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -53,12 +53,6 @@ impl Family { impl HasEvents for Family { fn add_event(&mut self, event: Event) -> () { - let event_type = &event.event; - for e in &self.events { - if &e.event == event_type { - panic!("Family already has a {:?} event", e.event); - } - } self.events.push(event); } fn events(&self) -> Vec { From ed3adef3b1df6455fc5ac2da9dffb8364c1aa8ad Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 00:12:46 -0700 Subject: [PATCH 13/55] refactor out family_link to own file --- src/types/family_link.rs | 46 ++++++++++++++++++++++++++++++++++++++++ src/types/individual.rs | 44 +------------------------------------- src/types/mod.rs | 3 +++ 3 files changed, 50 insertions(+), 43 deletions(-) create mode 100644 src/types/family_link.rs diff --git a/src/types/family_link.rs b/src/types/family_link.rs new file mode 100644 index 0000000..7a847cb --- /dev/null +++ b/src/types/family_link.rs @@ -0,0 +1,46 @@ +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +type Xref = String; + +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct FamilyLink(pub Xref, pub Relation, pub Option); + +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub enum Relation { + Spouse, + Child, +} + +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub enum Pedigree { + Adopted, + Birth, + Foster, + Sealing, +} + +impl FamilyLink { + #[must_use] + pub fn new(xref: Xref, tag: &str) -> FamilyLink { + let link_type = match tag { + "FAMC" => Relation::Child, + "FAMS" => Relation::Spouse, + _ => panic!("Unrecognized family type tag: {}", tag), + }; + FamilyLink(xref, link_type, None) + } + + pub fn set_pedigree(&mut self, pedigree_text: &str) { + self.2 = match pedigree_text.to_lowercase().as_str() { + "adopted" => Some(Pedigree::Adopted), + "birth" => Some(Pedigree::Birth), + "foster" => Some(Pedigree::Foster), + "sealing" => Some(Pedigree::Sealing), + _ => panic!("Unrecognized family link pedigree: {}", pedigree_text), + }; + } +} diff --git a/src/types/individual.rs b/src/types/individual.rs index bf44314..bbdbcc0 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,4 +1,4 @@ -use crate::types::{event::HasEvents, CustomData, Event}; +use crate::types::{event::HasEvents, CustomData, Event, FamilyLink}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -69,48 +69,6 @@ pub enum Gender { Unknown, } -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -enum FamilyLinkType { - Spouse, - Child, -} - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -enum Pedigree { - Adopted, - Birth, - Foster, - Sealing, -} - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct FamilyLink(Xref, FamilyLinkType, Option); - -impl FamilyLink { - #[must_use] - pub fn new(xref: Xref, tag: &str) -> FamilyLink { - let link_type = match tag { - "FAMC" => FamilyLinkType::Child, - "FAMS" => FamilyLinkType::Spouse, - _ => panic!("Unrecognized family type tag: {}", tag), - }; - FamilyLink(xref, link_type, None) - } - - pub fn set_pedigree(&mut self, pedigree_text: &str) { - self.2 = match pedigree_text.to_lowercase().as_str() { - "adopted" => Some(Pedigree::Adopted), - "birth" => Some(Pedigree::Birth), - "foster" => Some(Pedigree::Foster), - "sealing" => Some(Pedigree::Sealing), - _ => panic!("Unrecognized family link pedigree: {}", pedigree_text), - }; - } -} - #[derive(Debug, Default, PartialEq)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Name { diff --git a/src/types/mod.rs b/src/types/mod.rs index 07dc0e5..0aeab88 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -25,6 +25,9 @@ pub use individual::*; mod family; pub use family::*; +mod family_link; +pub use family_link::FamilyLink; + mod submitter; pub use submitter::*; From 95cdb7fbf688c2711e33d6d7a07a15075de13f9c Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 12:18:26 -0700 Subject: [PATCH 14/55] impl more Parseable types --- .gitignore | 1 + src/bin.rs | 4 +- src/lib.rs | 4 +- src/parser.rs | 171 +++++++-------------------------------- src/tokenizer.rs | 1 - src/types/event.rs | 1 - src/types/family_link.rs | 34 ++++++++ src/types/individual.rs | 126 +++++++++++++++++++++++++---- 8 files changed, 178 insertions(+), 164 deletions(-) diff --git a/.gitignore b/.gitignore index ea8c4bf..0592392 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.DS_Store diff --git a/src/bin.rs b/src/bin.rs index f9c59a6..a8797bd 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,5 +1,5 @@ use gedcom::parser::Parser; -use gedcom::GedcomData; +use gedcom::Gedcom; use std::env; use std::fs; use std::path::PathBuf; @@ -18,7 +18,7 @@ fn main() { usage(""); } - let data: GedcomData; + let data: Gedcom; if let Ok(contents) = read_relative(filename) { let mut parser = Parser::new(contents.chars()); diff --git a/src/lib.rs b/src/lib.rs index 70da3c4..cc9ec86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,11 +27,11 @@ pub mod tokenizer; pub mod types; mod tree; -pub use tree::Gedcom as GedcomData; +pub use tree::Gedcom; #[must_use] /// Helper function for converting GEDCOM file content stream to parsed data. -pub fn parse(content: std::str::Chars) -> GedcomData { +pub fn parse(content: std::str::Chars) -> Gedcom { let mut p = parser::Parser::new(content); p.parse_record() } diff --git a/src/parser.rs b/src/parser.rs index c5e459d..efe12b0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,8 +4,8 @@ use std::{error::Error, fmt, panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; use crate::tree::Gedcom; use crate::types::{ - event::HasEvents, Address, CustomData, Event, Family, FamilyLink, Gender, Header, Individual, - Name, RepoCitation, Repository, Source, SourceCitation, Submitter, + Address, CustomData, Family, Header, Individual, RepoCitation, Repository, Source, + SourceCitation, Submitter, }; /// The Gedcom parser that converts the token list into a data structure @@ -20,8 +20,15 @@ impl<'a> Parser<'a> { #[must_use] pub fn new(chars: Chars<'a>) -> Parser { let mut tokenizer = Tokenizer::new(chars); - tokenizer.next_token(); - Parser { tokenizer } + if tokenizer.current_token == Token::None { + tokenizer.next_token(); + Parser { tokenizer } + } else { + panic!( + "Unexpected starting token, found {:?}", + &tokenizer.current_token + ); + } } /// Does the actual parsing of the record. @@ -51,7 +58,11 @@ impl<'a> Parser<'a> { "FAM" => { data.add_family(Family::parse(self, level).unwrap().with_xref(pointer)) } - "INDI" => data.add_individual(self.parse_individual(level, pointer)), + "INDI" => { + let mut individual = Individual::parse(self, level).unwrap(); + individual.xref = pointer; + data.add_individual(individual); + } "REPO" => data.add_repository(self.parse_repository(level, pointer)), "SOUR" => data.add_source(self.parse_source(level, pointer)), "SUBM" => data.add_submitter(self.parse_submitter(level, pointer)), @@ -107,50 +118,6 @@ impl<'a> Parser<'a> { submitter } - /// Parses INDI top-level tag - fn parse_individual(&mut self, level: u8, xref: Option) -> Individual { - // skip over INDI tag name - self.tokenizer.next_token(); - let mut individual = Individual::new(xref); - - while self.tokenizer.current_token != Token::Level(level) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "NAME" => individual.name = Some(self.parse_name(level + 1)), - "SEX" => individual.sex = self.parse_gender(), - "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" - | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" - | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { - individual.add_event(self.parse_event(level + 1)); - } - "FAMC" | "FAMS" => { - let tag_clone = tag.clone(); - individual - .add_family(self.parse_family_link(tag_clone.as_str(), level + 1)); - } - "CHAN" => { - // assuming it always only has a single DATE subtag - self.tokenizer.next_token(); // level - self.tokenizer.next_token(); // DATE tag - individual.last_updated = Some(self.take_line_value()); - } - _ => self.skip_current_tag(level + 1, "Individual"), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - individual.add_custom_data(self.parse_custom_tag(tag_clone)) - } - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Individual Token: {:?}", - self.tokenizer.current_token - ), - } - } - - individual - } - fn parse_source(&mut self, level: u8, xref: Option) -> Source { // skip SOUR tag self.tokenizer.next_token(); @@ -215,7 +182,7 @@ impl<'a> Parser<'a> { repo } - fn parse_custom_tag(&mut self, tag: String) -> CustomData { + pub(crate) fn parse_custom_tag(&mut self, tag: String) -> CustomData { let value = self.take_line_value(); CustomData { tag, value } } @@ -247,29 +214,6 @@ impl<'a> Parser<'a> { header } - fn parse_family_link(&mut self, tag: &str, level: u8) -> FamilyLink { - let xref = self.take_line_value(); - let mut link = FamilyLink::new(xref, tag); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PEDI" => link.set_pedigree(self.take_line_value().as_str()), - _ => self.skip_current_tag(level + 1, "FamilyLink"), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => self.handle_unexpected_token(level + 1, "FamilyLink"), - } - } - - link - } - fn parse_repo_citation(&mut self, level: u8) -> RepoCitation { let xref = self.take_line_value(); let mut citation = RepoCitation { @@ -297,65 +241,6 @@ impl<'a> Parser<'a> { citation } - fn parse_gender(&mut self) -> Gender { - self.tokenizer.next_token(); - let gender: Gender; - if let Token::LineValue(gender_string) = &self.tokenizer.current_token { - gender = match gender_string.as_str() { - "M" => Gender::Male, - "F" => Gender::Female, - "N" => Gender::Nonbinary, - "U" => Gender::Unknown, - _ => panic!("{} Unknown gender value {}", self.dbg(), gender_string), - }; - } else { - panic!( - "Expected gender LineValue, found {:?}", - self.tokenizer.current_token - ); - } - self.tokenizer.next_token(); - gender - } - - fn parse_name(&mut self, level: u8) -> Name { - let mut name = Name::default(); - name.value = Some(self.take_line_value()); - let mut cur_level = level; - - loop { - if let Token::Level(new_level) = self.tokenizer.current_token { - if new_level <= cur_level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "GIVN" => name.given = Some(self.take_line_value()), - "NPFX" => name.prefix = Some(self.take_line_value()), - "NSFX" => name.suffix = Some(self.take_line_value()), - "SPFX" => name.surname_prefix = Some(self.take_line_value()), - "SURN" => name.surname = Some(self.take_line_value()), - _ => self.skip_current_tag(cur_level, "Name"), - }, - Token::Level(_) => { - cur_level += 1; - self.tokenizer.next_token() - } - _ => panic!("Unhandled Name Token: {:?}", self.tokenizer.current_token), - } - } - - name - } - - fn parse_event(&mut self, level: u8) -> Event { - match Event::parse(self, level) { - Ok(event) => event, - Err(e) => panic!("event parsing fail: {:?}", e), - } - } - /// Parses ADDR tag fn parse_address(&mut self, level: u8) -> Address { match Address::parse(self, level) { @@ -405,10 +290,7 @@ impl<'a> Parser<'a> { value.push('\n'); value.push_str(&self.take_line_value()) } - "CONC" => { - value.push(' '); - value.push_str(&self.take_line_value()) - } + "CONC" => value.push_str(&self.take_line_value()), _ => panic!("{} Unhandled Continuation Tag: {}", self.dbg(), tag), }, Token::Level(_) => self.tokenizer.next_token(), @@ -440,17 +322,18 @@ impl<'a> Parser<'a> { value } - pub(crate) fn skip_current_tag(&mut self, level: u8, parent_name: &str) { + pub(crate) fn take_tag(&mut self) -> &str { if let Token::Tag(tag) = &self.tokenizer.current_token { - println!( - "{} Unhandled {} Tag: {}", - self.dbg_lvl(level), - parent_name, - tag - ); + tag } else { - panic!("Expected tag, found {:?}", &self.tokenizer.current_token); + panic!("Expected tag, found {:?}", &self.tokenizer.current_token) } + } + + pub(crate) fn skip_current_tag(&mut self, level: u8, parent_name: &str) { + let dbg = self.dbg_lvl(level); + let tag = self.take_tag(); + println!("{} Unhandled {} Tag: {}", dbg, parent_name, tag); self.skip_block(level); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 6dad6c7..7dfc354 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -76,7 +76,6 @@ impl<'a> Tokenizer<'a> { // handle tag with trailing whitespace if self.current_char == '\n' { - // println!("line {}: trailing whitespace {:?}", self.line, self.current_token); self.next_token(); return; } diff --git a/src/types/event.rs b/src/types/event.rs index 830d96a..3090c7b 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -140,7 +140,6 @@ impl Parsable for Event { // Events begin with either EVEN , or a type tag. let type_tag: &str = if tag == "EVEN" { - println!("{:?}", &parser.tokenizer.current_token); if let Token::LineValue(v) = &parser.tokenizer.current_token { v } else { diff --git a/src/types/family_link.rs b/src/types/family_link.rs index 7a847cb..21b9f23 100644 --- a/src/types/family_link.rs +++ b/src/types/family_link.rs @@ -1,3 +1,6 @@ +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -44,3 +47,34 @@ impl FamilyLink { }; } } + +impl Parsable for FamilyLink { + fn parse(parser: &mut Parser, level: u8) -> Result { + // TODO: parser.take_tag() + let tag = parser.take_tag(); + let relation = match tag { + "FAMC" => Relation::Child, + "FAMS" => Relation::Spouse, + _ => panic!("Unrecognized family type tag: {}", tag), + }; + let mut link = FamilyLink(parser.take_line_value(), relation, None); + + loop { + if let Token::Level(cur_level) = parser.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "PEDI" => link.set_pedigree(parser.take_line_value().as_str()), + _ => parser.skip_current_tag(level + 1, "FamilyLink"), + }, + Token::Level(_) => parser.tokenizer.next_token(), + _ => parser.handle_unexpected_token(level + 1, "FamilyLink"), + } + } + + Ok(link) + } +} diff --git a/src/types/individual.rs b/src/types/individual.rs index bbdbcc0..3bce939 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,11 +1,15 @@ +use crate::parser::{Parsable, Parser, ParsingError}; +use crate::tokenizer::Token; use crate::types::{event::HasEvents, CustomData, Event, FamilyLink}; +use std::default::Default; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; type Xref = String; /// A Person within the family tree -#[derive(Debug)] +#[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Individual { pub xref: Option, @@ -18,19 +22,6 @@ pub struct Individual { } impl Individual { - #[must_use] - pub fn new(xref: Option) -> Individual { - Individual { - xref, - name: None, - sex: Gender::Unknown, - events: Vec::new(), - families: Vec::new(), - custom_data: Vec::new(), - last_updated: None, - } - } - pub fn add_family(&mut self, link: FamilyLink) { let mut do_add = true; let xref = &link.0; @@ -58,6 +49,50 @@ impl HasEvents for Individual { } } +impl Parsable for Individual { + /// Parses INDI top-level tag + fn parse(parser: &mut Parser, level: u8) -> Result { + // skip over INDI tag name + parser.tokenizer.next_token(); + let mut individual = Individual::default(); + + while parser.tokenizer.current_token != Token::Level(level) { + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "NAME" => individual.name = Some(Name::parse(parser, level + 1).unwrap()), + "SEX" => individual.sex = Gender::parse(parser, level + 1).unwrap(), + "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" + | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" + | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { + individual.add_event(Event::parse(parser, level + 1).unwrap()); + } + "FAMC" | "FAMS" => { + individual.add_family(FamilyLink::parse(parser, level + 1).unwrap()) + } + "CHAN" => { + // assuming it always only has a single DATE subtag + parser.tokenizer.next_token(); // level + parser.tokenizer.next_token(); // DATE tag + individual.last_updated = Some(parser.take_line_value()); + } + _ => parser.skip_current_tag(level + 1, "Individual"), + }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + individual.add_custom_data(parser.parse_custom_tag(tag_clone)) + } + Token::Level(_) => parser.tokenizer.next_token(), + _ => panic!( + "Unhandled Individual Token: {:?}", + parser.tokenizer.current_token + ), + } + } + + Ok(individual) + } +} + /// Gender of an `Individual` #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -69,6 +104,36 @@ pub enum Gender { Unknown, } +impl Default for Gender { + fn default() -> Gender { + Gender::Unknown + } +} + +impl Parsable for Gender { + fn parse(parser: &mut Parser, _level: u8) -> Result { + parser.tokenizer.next_token(); + let gender: Gender; + if let Token::LineValue(gender_string) = &parser.tokenizer.current_token { + gender = match gender_string.as_str() { + "M" => Gender::Male, + "F" => Gender::Female, + "N" => Gender::Nonbinary, + "U" => Gender::Unknown, + _ => panic!("{} Unknown gender value {}", parser.dbg(), gender_string), + }; + } else { + panic!( + "Expected gender LineValue, found {:?}", + parser.tokenizer.current_token + ); + } + parser.tokenizer.next_token(); + + Ok(gender) + } +} + #[derive(Debug, Default, PartialEq)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Name { @@ -79,3 +144,36 @@ pub struct Name { pub surname_prefix: Option, pub suffix: Option, } + +impl Parsable for Name { + fn parse(parser: &mut Parser, level: u8) -> Result { + let mut name = Name::default(); + name.value = Some(parser.take_line_value()); + let mut cur_level = level; + + loop { + if let Token::Level(new_level) = parser.tokenizer.current_token { + if new_level <= cur_level { + break; + } + } + match &parser.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "GIVN" => name.given = Some(parser.take_line_value()), + "NPFX" => name.prefix = Some(parser.take_line_value()), + "NSFX" => name.suffix = Some(parser.take_line_value()), + "SPFX" => name.surname_prefix = Some(parser.take_line_value()), + "SURN" => name.surname = Some(parser.take_line_value()), + _ => parser.skip_current_tag(cur_level, "Name"), + }, + Token::Level(_) => { + cur_level += 1; + parser.tokenizer.next_token() + } + _ => panic!("Unhandled Name Token: {:?}", parser.tokenizer.current_token), + } + } + + Ok(name) + } +} From 644736b1029aa80eaccd4b220f0f0d200fab03e7 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 12:27:22 -0700 Subject: [PATCH 15/55] use take_tag & it parses CustomTag strs --- src/parser.rs | 31 +++++++++++++++---------------- src/types/event.rs | 10 +--------- src/types/family_link.rs | 1 - src/types/individual.rs | 5 +---- 4 files changed, 17 insertions(+), 30 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index efe12b0..24ae358 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -72,10 +72,8 @@ impl<'a> Parser<'a> { self.skip_block(level) } }; - } else if let Token::CustomTag(tag) = &self.tokenizer.current_token { - // TODO - let tag_clone = tag.clone(); - let custom_data = self.parse_custom_tag(tag_clone); + } else if let Token::CustomTag(_) = &self.tokenizer.current_token { + let custom_data = self.parse_custom_tag(); println!( "{} Skipping top-level custom tag: {:?}", self.dbg(), @@ -133,12 +131,13 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "DATA" => self.tokenizer.next_token(), // TODO: cleanup to just use parse_event - // "EVEN" => { - // let events_recorded = self.take_line_value(); - // let mut event = self.parse_event(level + 2); - // event.with_source_data(events_recorded); - // source.data.add_event(event); - // } + "EVEN" => { + panic!("{}, here!", self.dbg_lvl(level + 1)); + // let events_recorded = self.take_line_value(); + // let mut event = self.parse_event(level + 2); + // event.with_source_data(events_recorded); + // source.data.add_event(event); + } "AGNC" => source.data.agency = Some(self.take_line_value()), "ABBR" => source.abbreviation = Some(self.take_continued_text(level + 1)), "TITL" => source.title = Some(self.take_continued_text(level + 1)), @@ -182,8 +181,9 @@ impl<'a> Parser<'a> { repo } - pub(crate) fn parse_custom_tag(&mut self, tag: String) -> CustomData { - let value = self.take_line_value(); + pub(crate) fn parse_custom_tag(&mut self) -> CustomData { + let tag: String = self.take_tag().into(); + let value: String = self.take_line_value(); CustomData { tag, value } } @@ -323,10 +323,9 @@ impl<'a> Parser<'a> { } pub(crate) fn take_tag(&mut self) -> &str { - if let Token::Tag(tag) = &self.tokenizer.current_token { - tag - } else { - panic!("Expected tag, found {:?}", &self.tokenizer.current_token) + match &self.tokenizer.current_token { + Token::Tag(tag) | Token::CustomTag(tag) => tag, + _ => panic!("Expected tag, found {:?}", &self.tokenizer.current_token), } } diff --git a/src/types/event.rs b/src/types/event.rs index 3090c7b..763db19 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -128,15 +128,7 @@ impl Event { impl Parsable for Event { fn parse(parser: &mut Parser, level: u8) -> Result { // extract current tag name to determine event type. - let event_tag_token = parser.tokenizer.take_token(); - let tag: &str = if let Token::Tag(t) = &event_tag_token { - t.as_str().clone() - } else { - panic!( - "Expected event tag, found {:?}", - &parser.tokenizer.current_token - ); - }; + let tag: &str = parser.take_tag(); // Events begin with either EVEN , or a type tag. let type_tag: &str = if tag == "EVEN" { diff --git a/src/types/family_link.rs b/src/types/family_link.rs index 21b9f23..7319467 100644 --- a/src/types/family_link.rs +++ b/src/types/family_link.rs @@ -50,7 +50,6 @@ impl FamilyLink { impl Parsable for FamilyLink { fn parse(parser: &mut Parser, level: u8) -> Result { - // TODO: parser.take_tag() let tag = parser.take_tag(); let relation = match tag { "FAMC" => Relation::Child, diff --git a/src/types/individual.rs b/src/types/individual.rs index 3bce939..b630594 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -77,10 +77,7 @@ impl Parsable for Individual { } _ => parser.skip_current_tag(level + 1, "Individual"), }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - individual.add_custom_data(parser.parse_custom_tag(tag_clone)) - } + Token::CustomTag(_) => individual.add_custom_data(parser.parse_custom_tag()), Token::Level(_) => parser.tokenizer.next_token(), _ => panic!( "Unhandled Individual Token: {:?}", From 4ed9780efbcecc28c39c036943ea6fa77348703a Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 12:55:11 -0700 Subject: [PATCH 16/55] support event descriptors --- src/parser.rs | 26 +++++++------------------- src/types/event.rs | 43 +++++++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 24ae358..231560a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,7 +4,7 @@ use std::{error::Error, fmt, panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; use crate::tree::Gedcom; use crate::types::{ - Address, CustomData, Family, Header, Individual, RepoCitation, Repository, Source, + Address, CustomData, Event, Family, Header, Individual, RepoCitation, Repository, Source, SourceCitation, Submitter, }; @@ -104,7 +104,7 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "NAME" => submitter.name = Some(self.take_line_value()), "ADDR" => { - submitter.address = Some(self.parse_address(level + 1)); + submitter.address = Some(Address::parse(self, level + 1).unwrap()); } "PHON" => submitter.phone = Some(self.take_line_value()), _ => self.skip_current_tag(level + 1, "Submitter"), @@ -131,13 +131,9 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "DATA" => self.tokenizer.next_token(), // TODO: cleanup to just use parse_event - "EVEN" => { - panic!("{}, here!", self.dbg_lvl(level + 1)); - // let events_recorded = self.take_line_value(); - // let mut event = self.parse_event(level + 2); - // event.with_source_data(events_recorded); - // source.data.add_event(event); - } + "EVEN" => source + .data + .add_event(Event::parse(self, level + 1).unwrap()), "AGNC" => source.data.agency = Some(self.take_line_value()), "ABBR" => source.abbreviation = Some(self.take_continued_text(level + 1)), "TITL" => source.title = Some(self.take_continued_text(level + 1)), @@ -170,7 +166,7 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "NAME" => repo.name = Some(self.take_line_value()), - "ADDR" => repo.address = Some(self.parse_address(level + 1)), + "ADDR" => repo.address = Some(Address::parse(self, level + 1).unwrap()), _ => self.skip_current_tag(level + 1, "Repository"), }, Token::Level(_) => self.tokenizer.next_token(), @@ -241,14 +237,6 @@ impl<'a> Parser<'a> { citation } - /// Parses ADDR tag - fn parse_address(&mut self, level: u8) -> Address { - match Address::parse(self, level) { - Ok(addr) => addr, - Err(e) => panic!("address fail: {:?}", e), - } - } - // TODO Citation::parse pub(crate) fn parse_citation(&mut self, level: u8) -> SourceCitation { let mut citation = SourceCitation { @@ -363,7 +351,7 @@ impl<'a> Parser<'a> { /// Debug function displaying GEDCOM line number of error message. pub(crate) fn dbg(&self) -> String { - format!("line {}:", self.tokenizer.line) + format!("line {}:", &self.tokenizer.line) } } diff --git a/src/types/event.rs b/src/types/event.rs index 763db19..1f7cf99 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -4,7 +4,9 @@ use crate::types::SourceCitation; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use std::{fmt, string::ToString}; +use std::default::Default; +use std::fmt; +use std::string::ToString; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, PartialEq)] @@ -83,12 +85,19 @@ impl EventType { } } +impl Default for EventType { + fn default() -> EventType { + EventType::Other + } +} + /// Event fact -#[derive(Clone)] +#[derive(Default, Clone)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Event { pub event: EventType, pub date: Option, + pub descriptor: Option, pub place: Option, pub citations: Vec, } @@ -96,12 +105,9 @@ pub struct Event { impl Event { #[must_use] pub fn new(etype: EventType) -> Event { - Event { - event: etype, - date: None, - place: None, - citations: Vec::new(), - } + let mut event = Event::default(); + event.event = etype; + event } /** converts an event to be of type `SourceData` with `value` as the data */ @@ -161,18 +167,27 @@ impl Parsable for Event { _ => parser.skip_current_tag(level + 1, "Event"), }, Token::Level(_) => parser.tokenizer.next_token(), - // some events are also bool-like w/ Y values, apparently? Token::LineValue(v) => { - // TODO: return error and stop using dbg - if v.as_str() != "Y" { - panic!("{} Surprise value {} as event value", parser.dbg(), v); + // some events have bool-like descriptor like "Y", apparently? just skip those? + if v.as_str() == "Y" { + parser.tokenizer.next_token(); + } else { + // TODO: handle comma-delimited event types... + // just setting as descriptor for now + + event.descriptor = Some(v.into()); + println!( + "{} Using event descriptor: {:?}", + parser.dbg(), + &event.descriptor + ); + parser.tokenizer.next_token(); } - // just skip Y's - parser.tokenizer.next_token(); } _ => parser.handle_unexpected_token(level + 1, "Event"), } } + Ok(event) } } From 04fe045da884d1393e0a63b5c701ca3399cc5750 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 13:28:21 -0700 Subject: [PATCH 17/55] set & get level directly from parser --- src/parser.rs | 96 +++++++++++++++++++++++----------------- src/types/address.rs | 9 ++-- src/types/event.rs | 13 +++--- src/types/family.rs | 13 +++--- src/types/family_link.rs | 11 ++--- src/types/header.rs | 7 +-- src/types/individual.rs | 34 +++++++------- 7 files changed, 100 insertions(+), 83 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 231560a..871699d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -11,6 +11,7 @@ use crate::types::{ /// The Gedcom parser that converts the token list into a data structure pub struct Parser<'a> { pub(crate) tokenizer: Tokenizer<'a>, + pub(crate) level: u8, } // TODO: expose useful helpers without publicizing tokenizer @@ -22,7 +23,10 @@ impl<'a> Parser<'a> { let mut tokenizer = Tokenizer::new(chars); if tokenizer.current_token == Token::None { tokenizer.next_token(); - Parser { tokenizer } + Parser { + tokenizer, + level: 0, + } } else { panic!( "Unexpected starting token, found {:?}", @@ -31,11 +35,24 @@ impl<'a> Parser<'a> { } } + pub(crate) fn set_level(&mut self) { + if let Token::Level(lvl) = self.tokenizer.current_token { + self.level = lvl; + self.tokenizer.next_token(); + } else { + panic!( + "{} Expected Level, found {:?}", + self.dbg(), + &self.tokenizer.current_token + ); + } + } + /// Does the actual parsing of the record. pub fn parse_record(&mut self) -> Gedcom { let mut data = Gedcom::default(); loop { - let level = match self.tokenizer.current_token { + self.level = match self.tokenizer.current_token { Token::Level(n) => n, _ => panic!( "{} Expected Level, found {:?}", @@ -54,22 +71,20 @@ impl<'a> Parser<'a> { if let Token::Tag(tag) = &self.tokenizer.current_token { match tag.as_str() { - "HEAD" => data.header = Header::parse(self, 0).unwrap(), - "FAM" => { - data.add_family(Family::parse(self, level).unwrap().with_xref(pointer)) - } + "HEAD" => data.header = Header::parse(self).unwrap(), + "FAM" => data.add_family(Family::parse(self).unwrap().with_xref(pointer)), "INDI" => { - let mut individual = Individual::parse(self, level).unwrap(); + let mut individual = Individual::parse(self).unwrap(); individual.xref = pointer; data.add_individual(individual); } - "REPO" => data.add_repository(self.parse_repository(level, pointer)), - "SOUR" => data.add_source(self.parse_source(level, pointer)), - "SUBM" => data.add_submitter(self.parse_submitter(level, pointer)), + "REPO" => data.add_repository(self.parse_repository(pointer)), + "SOUR" => data.add_source(self.parse_source(pointer)), + "SUBM" => data.add_submitter(self.parse_submitter(pointer)), "TRLR" => break, _ => { println!("{} Unhandled top-level data {}", self.dbg(), tag); - self.skip_block(level) + self.skip_block(self.level) } }; } else if let Token::CustomTag(_) = &self.tokenizer.current_token { @@ -79,7 +94,7 @@ impl<'a> Parser<'a> { self.dbg(), custom_data ); - self.skip_block(level); + self.skip_block(self.level); } else { println!( "{} Unhandled token {:?}", @@ -94,36 +109,38 @@ impl<'a> Parser<'a> { } /// Parses SUBM top-level tag - fn parse_submitter(&mut self, level: u8, xref: Option) -> Submitter { + fn parse_submitter(&mut self, xref: Option) -> Submitter { + let base_lvl = self.level; // skip over SUBM tag name self.tokenizer.next_token(); let mut submitter = Submitter::new(xref); - while self.tokenizer.current_token != Token::Level(level) { + while self.tokenizer.current_token != Token::Level(base_lvl) { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "NAME" => submitter.name = Some(self.take_line_value()), "ADDR" => { - submitter.address = Some(Address::parse(self, level + 1).unwrap()); + submitter.address = Some(Address::parse(self).unwrap()); } "PHON" => submitter.phone = Some(self.take_line_value()), - _ => self.skip_current_tag(level + 1, "Submitter"), + _ => self.skip_current_tag(self.level, "Submitter"), }, - Token::Level(_) => self.tokenizer.next_token(), - _ => self.handle_unexpected_token(level + 1, "SUBM"), + Token::Level(_) => self.set_level(), + _ => self.handle_unexpected_token(self.level, "SUBM"), } } submitter } - fn parse_source(&mut self, level: u8, xref: Option) -> Source { + fn parse_source(&mut self, xref: Option) -> Source { + let base_lvl = self.level; // skip SOUR tag self.tokenizer.next_token(); let mut source = Source::new(xref); loop { if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } @@ -131,17 +148,15 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "DATA" => self.tokenizer.next_token(), // TODO: cleanup to just use parse_event - "EVEN" => source - .data - .add_event(Event::parse(self, level + 1).unwrap()), + "EVEN" => source.data.add_event(Event::parse(self).unwrap()), "AGNC" => source.data.agency = Some(self.take_line_value()), - "ABBR" => source.abbreviation = Some(self.take_continued_text(level + 1)), - "TITL" => source.title = Some(self.take_continued_text(level + 1)), - "REPO" => source.add_repo_citation(self.parse_repo_citation(level + 1)), - _ => self.skip_current_tag(level + 1, "Source"), + "ABBR" => source.abbreviation = Some(self.take_continued_text(self.level)), + "TITL" => source.title = Some(self.take_continued_text(self.level)), + "REPO" => source.add_repo_citation(self.parse_repo_citation(self.level)), + _ => self.skip_current_tag(self.level, "Source"), }, - Token::Level(_) => self.tokenizer.next_token(), - _ => self.handle_unexpected_token(level + 1, "SOUR"), + Token::Level(_) => self.set_level(), + _ => self.handle_unexpected_token(self.level, "SOUR"), } } @@ -149,7 +164,8 @@ impl<'a> Parser<'a> { } /// Parses REPO top-level tag. - fn parse_repository(&mut self, level: u8, xref: Option) -> Repository { + fn parse_repository(&mut self, xref: Option) -> Repository { + let base_lvl = self.level; // skip REPO tag self.tokenizer.next_token(); let mut repo = Repository { @@ -159,18 +175,18 @@ impl<'a> Parser<'a> { }; loop { if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "NAME" => repo.name = Some(self.take_line_value()), - "ADDR" => repo.address = Some(Address::parse(self, level + 1).unwrap()), - _ => self.skip_current_tag(level + 1, "Repository"), + "ADDR" => repo.address = Some(Address::parse(self).unwrap()), + _ => self.skip_current_tag(self.level, "Repository"), }, - Token::Level(_) => self.tokenizer.next_token(), - _ => self.handle_unexpected_token(level + 1, "REPO"), + Token::Level(_) => self.set_level(), + _ => self.handle_unexpected_token(self.level, "REPO"), } } @@ -203,7 +219,7 @@ impl<'a> Parser<'a> { } _ => panic!("{} Unhandled GEDC Tag: {}", self.dbg(), tag), }, - Token::Level(_) => self.tokenizer.next_token(), + Token::Level(_) => self.set_level(), _ => self.handle_unexpected_token(2, "GEDC"), } } @@ -227,7 +243,7 @@ impl<'a> Parser<'a> { "CALN" => citation.call_number = Some(self.take_line_value()), _ => panic!("{} Unhandled RepoCitation Tag: {}", self.dbg(), tag), }, - Token::Level(_) => self.tokenizer.next_token(), + Token::Level(_) => self.set_level(), _ => panic!( "Unhandled RepoCitation Token: {:?}", self.tokenizer.current_token @@ -254,7 +270,7 @@ impl<'a> Parser<'a> { "PAGE" => citation.page = Some(self.take_line_value()), _ => self.skip_current_tag(level + 1, "Citation"), }, - Token::Level(_) => self.tokenizer.next_token(), + Token::Level(_) => self.set_level(), _ => self.handle_unexpected_token(level + 1, "Citation"), } } @@ -281,7 +297,7 @@ impl<'a> Parser<'a> { "CONC" => value.push_str(&self.take_line_value()), _ => panic!("{} Unhandled Continuation Tag: {}", self.dbg(), tag), }, - Token::Level(_) => self.tokenizer.next_token(), + Token::Level(_) => self.set_level(), _ => panic!( "Unhandled Continuation Token: {:?}", self.tokenizer.current_token @@ -362,7 +378,7 @@ pub trait Parsable { /// /// # Errors /// Raises a `ParsingError` when unhandled or unexpected tokens are found. - fn parse(parser: &mut Parser, level: u8) -> Result; + fn parse(parser: &mut Parser) -> Result; } #[derive(Debug)] diff --git a/src/types/address.rs b/src/types/address.rs index 2ae436b..d1a4307 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -37,7 +37,8 @@ impl fmt::Display for Address { } impl Parsable
for Address { - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { + let base_lvl = parser.level; // skip ADDR tag if let Token::Tag(_) = &parser.tokenizer.current_token { parser.tokenizer.next_token(); @@ -54,7 +55,7 @@ impl Parsable
for Address { loop { if let Token::Level(cur_level) = parser.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } @@ -72,9 +73,9 @@ impl Parsable
for Address { "POST" => address.post = Some(parser.take_line_value()), "CTRY" => address.country = Some(parser.take_line_value()), // TODO ParsingError - _ => parser.skip_current_tag(level + 1, "Address"), + _ => parser.skip_current_tag(parser.level, "Address"), }, - Token::Level(_) => parser.tokenizer.next_token(), + Token::Level(_) => parser.set_level(), _ => panic!( "Unhandled Address Token: {:?}", parser.tokenizer.current_token diff --git a/src/types/event.rs b/src/types/event.rs index 1f7cf99..59752a2 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -132,7 +132,8 @@ impl Event { } impl Parsable for Event { - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { + let base_lvl = parser.level; // extract current tag name to determine event type. let tag: &str = parser.take_tag(); @@ -154,7 +155,7 @@ impl Parsable for Event { loop { if let Token::Level(cur_level) = parser.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } @@ -163,10 +164,10 @@ impl Parsable for Event { "DATE" => event.date = Some(parser.take_line_value()), "PLAC" => event.place = Some(parser.take_line_value()), // TODO Citation::parse - "SOUR" => event.add_citation(parser.parse_citation(level + 1)), - _ => parser.skip_current_tag(level + 1, "Event"), + "SOUR" => event.add_citation(parser.parse_citation(parser.level)), + _ => parser.skip_current_tag(parser.level, "Event"), }, - Token::Level(_) => parser.tokenizer.next_token(), + Token::Level(_) => parser.set_level(), Token::LineValue(v) => { // some events have bool-like descriptor like "Y", apparently? just skip those? if v.as_str() == "Y" { @@ -184,7 +185,7 @@ impl Parsable for Event { parser.tokenizer.next_token(); } } - _ => parser.handle_unexpected_token(level + 1, "Event"), + _ => parser.handle_unexpected_token(parser.level, "Event"), } } diff --git a/src/types/family.rs b/src/types/family.rs index 1e2954b..353421e 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -61,22 +61,23 @@ impl HasEvents for Family { impl Parsable for Family { /// Parses FAM top-level tag - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { + let base_lvl = parser.level; // skip over FAM tag name parser.tokenizer.next_token(); let mut family = Family::default(); - while parser.tokenizer.current_token != Token::Level(level) { + while parser.tokenizer.current_token != Token::Level(base_lvl) { match &parser.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "MARR" => family.add_event(Event::parse(parser, level + 1).unwrap()), + "MARR" => family.add_event(Event::parse(parser).unwrap()), "HUSB" => family.set_individual1(parser.take_line_value()), "WIFE" => family.set_individual2(parser.take_line_value()), "CHIL" => family.add_child(parser.take_line_value()), - _ => parser.skip_current_tag(level + 1, "Family"), + _ => parser.skip_current_tag(parser.level, "Family"), }, - Token::Level(_) => parser.tokenizer.next_token(), - _ => parser.handle_unexpected_token(level + 1, "FAM"), + Token::Level(_) => parser.set_level(), + _ => parser.handle_unexpected_token(parser.level, "FAM"), } } diff --git a/src/types/family_link.rs b/src/types/family_link.rs index 7319467..00574f9 100644 --- a/src/types/family_link.rs +++ b/src/types/family_link.rs @@ -49,7 +49,8 @@ impl FamilyLink { } impl Parsable for FamilyLink { - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { + let base_lvl = parser.level; let tag = parser.take_tag(); let relation = match tag { "FAMC" => Relation::Child, @@ -60,17 +61,17 @@ impl Parsable for FamilyLink { loop { if let Token::Level(cur_level) = parser.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } match &parser.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PEDI" => link.set_pedigree(parser.take_line_value().as_str()), - _ => parser.skip_current_tag(level + 1, "FamilyLink"), + _ => parser.skip_current_tag(parser.level, "FamilyLink"), }, - Token::Level(_) => parser.tokenizer.next_token(), - _ => parser.handle_unexpected_token(level + 1, "FamilyLink"), + Token::Level(_) => parser.set_level(), + _ => parser.handle_unexpected_token(parser.level, "FamilyLink"), } } diff --git a/src/types/header.rs b/src/types/header.rs index 6e4205c..743f427 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -41,14 +41,15 @@ impl Header { impl Parsable
for Header { /// Parses HEAD top-level tag - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { + let base_lvl = parser.level; // skip over HEAD tag name parser.tokenizer.next_token(); let mut header = Header::default(); // just skipping the header for now - while parser.tokenizer.current_token != Token::Level(level) { + while parser.tokenizer.current_token != Token::Level(base_lvl) { match &parser.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { // TODO: CHAR.VERS - version @@ -87,7 +88,7 @@ impl Parsable
for Header { } _ => parser.skip_current_tag(1, "Header"), }, - Token::Level(_) => parser.tokenizer.next_token(), + Token::Level(_) => parser.set_level(), _ => parser.handle_unexpected_token(1, "HEAD"), } } diff --git a/src/types/individual.rs b/src/types/individual.rs index b630594..9c2ab85 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -51,34 +51,33 @@ impl HasEvents for Individual { impl Parsable for Individual { /// Parses INDI top-level tag - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { // skip over INDI tag name parser.tokenizer.next_token(); let mut individual = Individual::default(); + let base_lvl = parser.level; - while parser.tokenizer.current_token != Token::Level(level) { + while parser.tokenizer.current_token != Token::Level(base_lvl) { match &parser.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "NAME" => individual.name = Some(Name::parse(parser, level + 1).unwrap()), - "SEX" => individual.sex = Gender::parse(parser, level + 1).unwrap(), + "NAME" => individual.name = Some(Name::parse(parser).unwrap()), + "SEX" => individual.sex = Gender::parse(parser).unwrap(), "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { - individual.add_event(Event::parse(parser, level + 1).unwrap()); - } - "FAMC" | "FAMS" => { - individual.add_family(FamilyLink::parse(parser, level + 1).unwrap()) + individual.add_event(Event::parse(parser).unwrap()); } + "FAMC" | "FAMS" => individual.add_family(FamilyLink::parse(parser).unwrap()), "CHAN" => { // assuming it always only has a single DATE subtag parser.tokenizer.next_token(); // level parser.tokenizer.next_token(); // DATE tag individual.last_updated = Some(parser.take_line_value()); } - _ => parser.skip_current_tag(level + 1, "Individual"), + _ => parser.skip_current_tag(parser.level, "Individual"), }, Token::CustomTag(_) => individual.add_custom_data(parser.parse_custom_tag()), - Token::Level(_) => parser.tokenizer.next_token(), + Token::Level(_) => parser.set_level(), _ => panic!( "Unhandled Individual Token: {:?}", parser.tokenizer.current_token @@ -108,7 +107,7 @@ impl Default for Gender { } impl Parsable for Gender { - fn parse(parser: &mut Parser, _level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { parser.tokenizer.next_token(); let gender: Gender; if let Token::LineValue(gender_string) = &parser.tokenizer.current_token { @@ -143,14 +142,14 @@ pub struct Name { } impl Parsable for Name { - fn parse(parser: &mut Parser, level: u8) -> Result { + fn parse(parser: &mut Parser) -> Result { let mut name = Name::default(); name.value = Some(parser.take_line_value()); - let mut cur_level = level; + let base_lvl = parser.level; loop { if let Token::Level(new_level) = parser.tokenizer.current_token { - if new_level <= cur_level { + if new_level <= base_lvl { break; } } @@ -161,12 +160,9 @@ impl Parsable for Name { "NSFX" => name.suffix = Some(parser.take_line_value()), "SPFX" => name.surname_prefix = Some(parser.take_line_value()), "SURN" => name.surname = Some(parser.take_line_value()), - _ => parser.skip_current_tag(cur_level, "Name"), + _ => parser.skip_current_tag(parser.level, "Name"), }, - Token::Level(_) => { - cur_level += 1; - parser.tokenizer.next_token() - } + Token::Level(_) => parser.set_level(), _ => panic!("Unhandled Name Token: {:?}", parser.tokenizer.current_token), } } From 7bdb0bd84cffbfe93f08efc742a5ea920dd7a25f Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 13:31:59 -0700 Subject: [PATCH 18/55] dbg() outputs level & skip_block requires no args --- src/parser.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 871699d..ef62441 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -84,7 +84,7 @@ impl<'a> Parser<'a> { "TRLR" => break, _ => { println!("{} Unhandled top-level data {}", self.dbg(), tag); - self.skip_block(self.level) + self.skip_block() } }; } else if let Token::CustomTag(_) = &self.tokenizer.current_token { @@ -94,7 +94,7 @@ impl<'a> Parser<'a> { self.dbg(), custom_data ); - self.skip_block(self.level); + self.skip_block(); } else { println!( "{} Unhandled token {:?}", @@ -334,26 +334,27 @@ impl<'a> Parser<'a> { } pub(crate) fn skip_current_tag(&mut self, level: u8, parent_name: &str) { - let dbg = self.dbg_lvl(level); + let dbg = self.dbg(); let tag = self.take_tag(); println!("{} Unhandled {} Tag: {}", dbg, parent_name, tag); - self.skip_block(level); + self.skip_block(); } pub(crate) fn handle_unexpected_token(&mut self, level: u8, base_tag: &str) { println!( "{} Unhandled {} Token: {:?}", - self.dbg_lvl(level), + self.dbg(), base_tag, &self.tokenizer.current_token ); - self.skip_block(level); + self.skip_block(); } - pub(crate) fn skip_block(&mut self, level: u8) { + pub(crate) fn skip_block(&mut self) { + let block_level = self.level; loop { if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if cur_level <= block_level { break; } } @@ -361,13 +362,9 @@ impl<'a> Parser<'a> { } } - fn dbg_lvl(&self, level: u8) -> String { - format!("line {}, level {}:", self.tokenizer.line, level) - } - /// Debug function displaying GEDCOM line number of error message. pub(crate) fn dbg(&self) -> String { - format!("line {}:", &self.tokenizer.line) + format!("line {}, level {} :", &self.tokenizer.line, &self.level) } } From 592f8527abf6f63b421b8bd80c23accc4199d5d3 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 13:35:32 -0700 Subject: [PATCH 19/55] unhandled block skippers don't take level arg --- src/parser.rs | 22 +++++++++++----------- src/types/address.rs | 2 +- src/types/event.rs | 4 ++-- src/types/family.rs | 4 ++-- src/types/family_link.rs | 4 ++-- src/types/header.rs | 4 ++-- src/types/individual.rs | 4 ++-- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index ef62441..77140b3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -123,10 +123,10 @@ impl<'a> Parser<'a> { submitter.address = Some(Address::parse(self).unwrap()); } "PHON" => submitter.phone = Some(self.take_line_value()), - _ => self.skip_current_tag(self.level, "Submitter"), + _ => self.skip_current_tag("Submitter"), }, Token::Level(_) => self.set_level(), - _ => self.handle_unexpected_token(self.level, "SUBM"), + _ => self.handle_unexpected_token("SUBM"), } } submitter @@ -153,10 +153,10 @@ impl<'a> Parser<'a> { "ABBR" => source.abbreviation = Some(self.take_continued_text(self.level)), "TITL" => source.title = Some(self.take_continued_text(self.level)), "REPO" => source.add_repo_citation(self.parse_repo_citation(self.level)), - _ => self.skip_current_tag(self.level, "Source"), + _ => self.skip_current_tag("Source"), }, Token::Level(_) => self.set_level(), - _ => self.handle_unexpected_token(self.level, "SOUR"), + _ => self.handle_unexpected_token("SOUR"), } } @@ -183,10 +183,10 @@ impl<'a> Parser<'a> { Token::Tag(tag) => match tag.as_str() { "NAME" => repo.name = Some(self.take_line_value()), "ADDR" => repo.address = Some(Address::parse(self).unwrap()), - _ => self.skip_current_tag(self.level, "Repository"), + _ => self.skip_current_tag("Repository"), }, Token::Level(_) => self.set_level(), - _ => self.handle_unexpected_token(self.level, "REPO"), + _ => self.handle_unexpected_token("REPO"), } } @@ -220,7 +220,7 @@ impl<'a> Parser<'a> { _ => panic!("{} Unhandled GEDC Tag: {}", self.dbg(), tag), }, Token::Level(_) => self.set_level(), - _ => self.handle_unexpected_token(2, "GEDC"), + _ => self.handle_unexpected_token("GEDC"), } } header @@ -268,10 +268,10 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PAGE" => citation.page = Some(self.take_line_value()), - _ => self.skip_current_tag(level + 1, "Citation"), + _ => self.skip_current_tag("Citation"), }, Token::Level(_) => self.set_level(), - _ => self.handle_unexpected_token(level + 1, "Citation"), + _ => self.handle_unexpected_token("Citation"), } } citation @@ -333,14 +333,14 @@ impl<'a> Parser<'a> { } } - pub(crate) fn skip_current_tag(&mut self, level: u8, parent_name: &str) { + pub(crate) fn skip_current_tag(&mut self, parent_name: &str) { let dbg = self.dbg(); let tag = self.take_tag(); println!("{} Unhandled {} Tag: {}", dbg, parent_name, tag); self.skip_block(); } - pub(crate) fn handle_unexpected_token(&mut self, level: u8, base_tag: &str) { + pub(crate) fn handle_unexpected_token(&mut self, base_tag: &str) { println!( "{} Unhandled {} Token: {:?}", self.dbg(), diff --git a/src/types/address.rs b/src/types/address.rs index d1a4307..5436aa5 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -73,7 +73,7 @@ impl Parsable
for Address { "POST" => address.post = Some(parser.take_line_value()), "CTRY" => address.country = Some(parser.take_line_value()), // TODO ParsingError - _ => parser.skip_current_tag(parser.level, "Address"), + _ => parser.skip_current_tag("Address"), }, Token::Level(_) => parser.set_level(), _ => panic!( diff --git a/src/types/event.rs b/src/types/event.rs index 59752a2..64afd65 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -165,7 +165,7 @@ impl Parsable for Event { "PLAC" => event.place = Some(parser.take_line_value()), // TODO Citation::parse "SOUR" => event.add_citation(parser.parse_citation(parser.level)), - _ => parser.skip_current_tag(parser.level, "Event"), + _ => parser.skip_current_tag("Event"), }, Token::Level(_) => parser.set_level(), Token::LineValue(v) => { @@ -185,7 +185,7 @@ impl Parsable for Event { parser.tokenizer.next_token(); } } - _ => parser.handle_unexpected_token(parser.level, "Event"), + _ => parser.handle_unexpected_token("Event"), } } diff --git a/src/types/family.rs b/src/types/family.rs index 353421e..cb3d301 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -74,10 +74,10 @@ impl Parsable for Family { "HUSB" => family.set_individual1(parser.take_line_value()), "WIFE" => family.set_individual2(parser.take_line_value()), "CHIL" => family.add_child(parser.take_line_value()), - _ => parser.skip_current_tag(parser.level, "Family"), + _ => parser.skip_current_tag("Family"), }, Token::Level(_) => parser.set_level(), - _ => parser.handle_unexpected_token(parser.level, "FAM"), + _ => parser.handle_unexpected_token("FAM"), } } diff --git a/src/types/family_link.rs b/src/types/family_link.rs index 00574f9..3b793c6 100644 --- a/src/types/family_link.rs +++ b/src/types/family_link.rs @@ -68,10 +68,10 @@ impl Parsable for FamilyLink { match &parser.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PEDI" => link.set_pedigree(parser.take_line_value().as_str()), - _ => parser.skip_current_tag(parser.level, "FamilyLink"), + _ => parser.skip_current_tag("FamilyLink"), }, Token::Level(_) => parser.set_level(), - _ => parser.handle_unexpected_token(parser.level, "FamilyLink"), + _ => parser.handle_unexpected_token("FamilyLink"), } } diff --git a/src/types/header.rs b/src/types/header.rs index 743f427..c547bd8 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -86,10 +86,10 @@ impl Parsable
for Header { parser.tokenizer.next_token(); } } - _ => parser.skip_current_tag(1, "Header"), + _ => parser.skip_current_tag("Header"), }, Token::Level(_) => parser.set_level(), - _ => parser.handle_unexpected_token(1, "HEAD"), + _ => parser.handle_unexpected_token("HEAD"), } } diff --git a/src/types/individual.rs b/src/types/individual.rs index 9c2ab85..23bba5a 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -74,7 +74,7 @@ impl Parsable for Individual { parser.tokenizer.next_token(); // DATE tag individual.last_updated = Some(parser.take_line_value()); } - _ => parser.skip_current_tag(parser.level, "Individual"), + _ => parser.skip_current_tag("Individual"), }, Token::CustomTag(_) => individual.add_custom_data(parser.parse_custom_tag()), Token::Level(_) => parser.set_level(), @@ -160,7 +160,7 @@ impl Parsable for Name { "NSFX" => name.suffix = Some(parser.take_line_value()), "SPFX" => name.surname_prefix = Some(parser.take_line_value()), "SURN" => name.surname = Some(parser.take_line_value()), - _ => parser.skip_current_tag(parser.level, "Name"), + _ => parser.skip_current_tag("Name"), }, Token::Level(_) => parser.set_level(), _ => panic!("Unhandled Name Token: {:?}", parser.tokenizer.current_token), From 83cc2e349d3e380c19e7515228ba189a3daaf1a6 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 13:37:44 -0700 Subject: [PATCH 20/55] parse_citation does not need level arg --- src/parser.rs | 5 +++-- src/types/event.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 77140b3..ebd2fd5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -254,14 +254,15 @@ impl<'a> Parser<'a> { } // TODO Citation::parse - pub(crate) fn parse_citation(&mut self, level: u8) -> SourceCitation { + pub(crate) fn parse_citation(&mut self) -> SourceCitation { + let base_lvl = self.level; let mut citation = SourceCitation { xref: self.take_line_value(), page: None, }; loop { if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } diff --git a/src/types/event.rs b/src/types/event.rs index 64afd65..1fc3c62 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -164,7 +164,7 @@ impl Parsable for Event { "DATE" => event.date = Some(parser.take_line_value()), "PLAC" => event.place = Some(parser.take_line_value()), // TODO Citation::parse - "SOUR" => event.add_citation(parser.parse_citation(parser.level)), + "SOUR" => event.add_citation(parser.parse_citation()), _ => parser.skip_current_tag("Event"), }, Token::Level(_) => parser.set_level(), From d94e706d718549a6de36e26edc32ce6c668989ff Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Sat, 20 Mar 2021 13:47:12 -0700 Subject: [PATCH 21/55] remove more level fn signatures --- src/parser.rs | 31 +++++++++++++------------------ src/types/header.rs | 2 +- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index ebd2fd5..7f6fbe5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -52,16 +52,7 @@ impl<'a> Parser<'a> { pub fn parse_record(&mut self) -> Gedcom { let mut data = Gedcom::default(); loop { - self.level = match self.tokenizer.current_token { - Token::Level(n) => n, - _ => panic!( - "{} Expected Level, found {:?}", - self.dbg(), - self.tokenizer.current_token - ), - }; - - self.tokenizer.next_token(); + self.set_level(); let mut pointer: Option = None; if let Token::Pointer(xref) = &self.tokenizer.current_token { @@ -147,12 +138,11 @@ impl<'a> Parser<'a> { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "DATA" => self.tokenizer.next_token(), - // TODO: cleanup to just use parse_event "EVEN" => source.data.add_event(Event::parse(self).unwrap()), "AGNC" => source.data.agency = Some(self.take_line_value()), - "ABBR" => source.abbreviation = Some(self.take_continued_text(self.level)), - "TITL" => source.title = Some(self.take_continued_text(self.level)), - "REPO" => source.add_repo_citation(self.parse_repo_citation(self.level)), + "ABBR" => source.abbreviation = Some(self.take_continued_text()), + "TITL" => source.title = Some(self.take_continued_text()), + "REPO" => source.add_repo_citation(self.parse_repo_citation()), _ => self.skip_current_tag("Source"), }, Token::Level(_) => self.set_level(), @@ -226,7 +216,8 @@ impl<'a> Parser<'a> { header } - fn parse_repo_citation(&mut self, level: u8) -> RepoCitation { + fn parse_repo_citation(&mut self) -> RepoCitation { + let base_lvl = self.level; let xref = self.take_line_value(); let mut citation = RepoCitation { xref, @@ -234,7 +225,7 @@ impl<'a> Parser<'a> { }; loop { if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } @@ -250,6 +241,7 @@ impl<'a> Parser<'a> { ), } } + citation } @@ -275,17 +267,19 @@ impl<'a> Parser<'a> { _ => self.handle_unexpected_token("Citation"), } } + citation } /// Takes the value of the current line including handling /// multi-line values from CONT & CONC tags. - pub(crate) fn take_continued_text(&mut self, level: u8) -> String { + pub(crate) fn take_continued_text(&mut self) -> String { + let base_lvl = self.level; let mut value = self.take_line_value(); loop { if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { + if cur_level <= base_lvl { break; } } @@ -324,6 +318,7 @@ impl<'a> Parser<'a> { ); } self.tokenizer.next_token(); + value } diff --git a/src/types/header.rs b/src/types/header.rs index c547bd8..8e9101e 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -60,7 +60,7 @@ impl Parsable
for Header { "DEST" => header.add_destination(parser.take_line_value()), "LANG" => header.language = Some(parser.take_line_value()), "FILE" => header.filename = Some(parser.take_line_value()), - "NOTE" => header.note = Some(parser.take_continued_text(1)), + "NOTE" => header.note = Some(parser.take_continued_text()), "SUBM" => header.submitter_tag = Some(parser.take_line_value()), "SUBN" => header.submission_tag = Some(parser.take_line_value()), "TIME" => { From c235cd35b3fee3151c35d1ffebc020a9dd8ea5a3 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 28 Aug 2022 16:28:45 -0500 Subject: [PATCH 22/55] Handle header structure, including header source --- Cargo.lock | 10 +- readme.md | 1 - src/parser.rs | 330 ++++++++++++++++++++++++++++++++++----- src/types/copyright.rs | 13 ++ src/types/corporation.rs | 22 +++ src/types/date.rs | 36 +++++ src/types/header.rs | 136 +++++++++++++--- src/types/mod.rs | 15 ++ src/types/note.rs | 32 ++++ src/types/place.rs | 18 +++ src/types/submitter.rs | 5 + src/types/translation.rs | 16 ++ tests/header.rs | 287 ++++++++++++++++++++++++++++++++++ tests/json_feature.rs | 30 ++++ tests/lib.rs | 7 +- 15 files changed, 890 insertions(+), 68 deletions(-) create mode 100644 src/types/copyright.rs create mode 100644 src/types/corporation.rs create mode 100644 src/types/date.rs create mode 100644 src/types/note.rs create mode 100644 src/types/place.rs create mode 100644 src/types/translation.rs create mode 100644 tests/header.rs diff --git a/Cargo.lock b/Cargo.lock index 6111758..c532f12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "gedcom" version = "0.2.2" @@ -11,9 +13,9 @@ dependencies = [ [[package]] name = "itoa" -version = "0.4.7" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" [[package]] name = "proc-macro2" @@ -61,9 +63,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.62" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" dependencies = [ "itoa", "ryu", diff --git a/readme.md b/readme.md index a197453..b35fbbe 100644 --- a/readme.md +++ b/readme.md @@ -60,7 +60,6 @@ Here are some notes about parsed data & tags. Page references are to the [Gedcom ### Top-level tags -* `HEAD.SOUR` - p.42 - The source in the header is currently skipped. * `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. * `MULTIMEDIA_RECORD` - p.26 - Multimedia (`OBJE`) is not currently parsed. * `NOTE_RECORD` - p.27 - Notes (`NOTE`) are also unhandled. (except in header) diff --git a/src/parser.rs b/src/parser.rs index 6473047..cf72072 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,8 +4,9 @@ use std::{panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; use crate::tree::GedcomData; use crate::types::{ - event::HasEvents, Address, CustomData, Event, Family, FamilyLink, Gender, Header, Individual, - Name, RepoCitation, Repository, Source, SourceCitation, Submitter, + event::HasEvents, Address, Copyright, Corporation, CustomData, Date, Encoding, Event, Family, + FamilyLink, GedcomDocument, Gender, HeadPlac, HeadSourData, HeadSource, Header, Individual, + Name, Note, RepoCitation, Repository, Source, SourceCitation, Submitter, Translation, }; /// The Gedcom parser that converts the token list into a data structure @@ -82,60 +83,190 @@ impl<'a> Parser<'a> { data } - /// Parses HEAD top-level tag + /// Parses HEAD top-level tag. See + /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER fn parse_header(&mut self) -> Header { // skip over HEAD tag name self.tokenizer.next_token(); let mut header = Header::default(); - // just skipping the header for now while self.tokenizer.current_token != Token::Level(0) { match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - // TODO: CHAR.VERS - "CHAR" => header.encoding = Some(self.take_line_value()), - "CORP" => header.corporation = Some(self.take_line_value()), - "COPR" => header.copyright = Some(self.take_line_value()), - "DATE" => header.date = Some(self.take_line_value()), - "DEST" => header.add_destination(self.take_line_value()), - "LANG" => header.language = Some(self.take_line_value()), - "FILE" => header.filename = Some(self.take_line_value()), - "NOTE" => header.note = Some(self.take_continued_text(1)), - "SUBM" => header.submitter_tag = Some(self.take_line_value()), - "SUBN" => header.submission_tag = Some(self.take_line_value()), - "TIME" => { - let time = self.take_line_value(); - // assuming subtag of DATE - if let Some(date) = header.date { - let mut datetime = String::new(); - datetime.push_str(&date); - datetime.push_str(" "); - datetime.push_str(&time); - header.date = Some(datetime); - } else { - panic!("Expected TIME to be under DATE in header."); - } - } "GEDC" => { header = self.parse_gedcom_data(header); } - // TODO: HeaderSource - "SOUR" => { - println!("WARNING: Skipping header source."); - while self.tokenizer.current_token != Token::Level(1) { - self.tokenizer.next_token(); - } - } + "SOUR" => header.source = Some(self.parse_head_source()), + "DEST" => header.destination = Some(self.take_line_value()), + "DATE" => header.date = Some(self.parse_date(1)), + "SUBM" => header.submitter_tag = Some(self.take_line_value()), + "SUBN" => header.submission_tag = Some(self.take_line_value()), + "FILE" => header.filename = Some(self.take_line_value()), + "COPR" => header.copyright = Some(self.parse_copyright(1)), + "CHAR" => header.encoding = Some(self.parse_encoding_data()), + "LANG" => header.language = Some(self.take_line_value()), + "NOTE" => header.note = Some(self.parse_note(1)), + "PLAC" => header.place = Some(self.parse_head_plac()), _ => panic!("{} Unhandled Header Tag: {}", self.dbg(), tag), }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + header.add_custom_data(self.parse_custom_tag(tag_clone)) + } Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Header Token: {:?}", self.tokenizer.current_token), + _ => panic!( + "Unhandled Header Token: {:?}", + &self.tokenizer.current_token + ), } } header } + /// parse_head_source handles the SOUR tag in a header + fn parse_head_source(&mut self) -> HeadSource { + let mut sour = HeadSource::default(); + sour.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= 1 { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "VERS" => sour.version = Some(self.take_line_value()), + "NAME" => sour.name = Some(self.take_line_value()), + "CORP" => sour.corporation = Some(self.parse_corporation(2)), + "DATA" => sour.data = Some(self.parse_head_data(2)), + _ => panic!("{} Unhandled CHAR Tag: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!("Unexpected SOUR Token: {:?}", &self.tokenizer.current_token), + } + } + sour + } + + /// parse_corporation is for a CORP tag within the SOUR tag of a HEADER + fn parse_corporation(&mut self, level: u8) -> Corporation { + let mut corp = Corporation::default(); + corp.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "ADDR" => corp.address = Some(self.parse_address(level + 1)), + "PHON" => corp.phone = Some(self.take_line_value()), + "EMAIL" => corp.email = Some(self.take_line_value()), + "FAX" => corp.fax = Some(self.take_line_value()), + "WWW" => corp.website = Some(self.take_line_value()), + _ => panic!("{} Unhandled CORP tag: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!( + "Unhandled CORP tag in header: {:?}", + self.tokenizer.current_token + ), + } + } + corp + } + + /// parse_head_data parses the DATA tag + fn parse_head_data(&mut self, level: u8) -> HeadSourData { + let mut data = HeadSourData::default(); + data.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => data.date = Some(self.parse_date(level + 1)), + "COPR" => data.copyright = Some(self.parse_copyright(level + 1)), + _ => panic!("{} unhandled DATA tag in header: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!( + "Unhandled SOUR tag in header: {:?}", + self.tokenizer.current_token + ), + } + } + data + } + + /// parse_head_plac handles the PLAC tag when it is present in header + fn parse_head_plac(&mut self) -> HeadPlac { + let mut h_plac = HeadPlac::default(); + // In the header, PLAC should have no payload. See + // https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC + self.tokenizer.next_token(); + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= 1 { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "FORM" => { + let form = self.take_line_value(); + let jurisdictional_titles = form.split(","); + + for t in jurisdictional_titles { + let v = t.trim(); + h_plac.push_jurisdictional_title(v.to_string()); + } + } + _ => panic!("{} Unhandled PLAC tag in header: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!( + "Unhandled PLAC tag in header: {:?}", + self.tokenizer.current_token + ), + } + } + + h_plac + } + + /// parse_copyright handles the COPR tag + fn parse_copyright(&mut self, level: u8) -> Copyright { + let mut copyright = Copyright::default(); + copyright.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONT" => copyright.continued = Some(self.take_line_value()), + "CONC" => copyright.continued = Some(self.take_line_value()), + _ => panic!("{} unhandled COPR tag in header: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!("Unhandled tag in COPR: {:?}", self.tokenizer.current_token), + } + } + copyright + } + /// Parses SUBM top-level tag fn parse_submitter(&mut self, level: u8, xref: Option) -> Submitter { // skip over SUBM tag name @@ -150,6 +281,8 @@ impl<'a> Parser<'a> { submitter.address = Some(self.parse_address(level + 1)); } "PHON" => submitter.phone = Some(self.take_line_value()), + "LANG" => submitter.language = Some(self.take_line_value()), + // "CHAN" => submitter.change_date = Some(self.take_line_value()), _ => panic!("{} Unhandled Submitter Tag: {}", self.dbg(), tag), }, Token::Level(_) => self.tokenizer.next_token(), @@ -304,15 +437,132 @@ impl<'a> Parser<'a> { CustomData { tag, value } } + /// parse_encoding_data handles the parsing of the CHARS tag + fn parse_encoding_data(&mut self) -> Encoding { + let mut encoding = Encoding::default(); + + encoding.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= 1 { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "VERS" => encoding.version = Some(self.take_line_value()), + _ => panic!("{} Unhandled CHAR Tag: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!( + "{} Unexpected CHAR Token: {:?}", + self.dbg(), + &self.tokenizer.current_token + ), + } + } + encoding + } + + /// parse_data handles the DATE tag + fn parse_date(&mut self, level: u8) -> Date { + let mut date = Date::default(); + date.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "TIME" => date.time = Some(self.take_line_value()), + _ => panic!("{} unhandled DATE tag: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!("Unexpected DATE token: {:?}", &self.tokenizer.current_token), + } + } + date + } + + ///parse_translation handles the TRAN tag + fn parse_translation(&mut self, level: u8) -> Translation { + let mut tran = Translation::default(); + tran.value = Some(self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "MIME" => tran.mime = Some(self.take_line_value()), + "LANG" => tran.language = Some(self.take_line_value()), + _ => panic!("{} unhandled NOTE tag: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!("Unexpected NOTE token: {:?}", &self.tokenizer.current_token), + } + } + tran + } + + ///parse_note handles the NOTE tag + fn parse_note(&mut self, level: u8) -> Note { + let mut note = Note::default(); + let mut value = String::new(); + + value.push_str(&self.take_line_value()); + + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &self.tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "MIME" => note.mime = Some(self.take_line_value()), + "TRAN" => note.translation = Some(self.parse_translation(level + 1)), + "LANG" => note.language = Some(self.take_line_value()), + "CONT" | "CONC" => { + value.push('\n'); + value.push_str(&self.take_line_value()); + } + _ => panic!("{} unhandled NOTE tag: {}", self.dbg(), tag), + }, + Token::Level(_) => self.tokenizer.next_token(), + _ => panic!("Unexpected NOTE token: {:?}", &self.tokenizer.current_token), + } + } + if value != "" { + note.value = Some(value); + } + note + } + /// Handle parsing GEDC tag fn parse_gedcom_data(&mut self, mut header: Header) -> Header { + let mut gedc = GedcomDocument::default(); + // skip GEDC tag self.tokenizer.next_token(); - while self.tokenizer.current_token != Token::Level(1) { + loop { + if let Token::Level(cur_level) = self.tokenizer.current_token { + if cur_level <= 1 { + break; + } + } + match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "VERS" => header.gedcom_version = Some(self.take_line_value()), + "VERS" => gedc.version = Some(self.take_line_value()), // this is the only value that makes sense. warn them otherwise. "FORM" => { let form = self.take_line_value(); @@ -321,6 +571,7 @@ impl<'a> Parser<'a> { "WARNING: Unrecognized GEDCOM form. Expected LINEAGE-LINKED, found {}" , form); } + gedc.form = Some(form); } _ => panic!("{} Unhandled GEDC Tag: {}", self.dbg(), tag), }, @@ -332,6 +583,7 @@ impl<'a> Parser<'a> { ), } } + header.gedcom = Some(gedc); header } @@ -480,7 +732,7 @@ impl<'a> Parser<'a> { } match &self.tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "CONT" => { + "CONT" | "CONC" => { value.push('\n'); value.push_str(&self.take_line_value()); } diff --git a/src/types/copyright.rs b/src/types/copyright.rs new file mode 100644 index 0000000..5ce92be --- /dev/null +++ b/src/types/copyright.rs @@ -0,0 +1,13 @@ +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// A copyright statement, as appropriate for the copyright laws applicable to this data. +/// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#COPR +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Copyright { + pub value: Option, + /// tag: CONT + pub continued: Option, +} + diff --git a/src/types/corporation.rs b/src/types/corporation.rs new file mode 100644 index 0000000..6d339a8 --- /dev/null +++ b/src/types/corporation.rs @@ -0,0 +1,22 @@ +use crate::types::Address; +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// Corporation (tag: CORP) is the name of the business, corporation, or person that produced or +/// commissioned the product. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#CORP +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Corporation { + pub value: Option, + /// tag: ADDR + pub address: Option
, + /// tag: PHON + pub phone: Option, + /// tag: EMAIL + pub email: Option, + /// tag: FAX + pub fax: Option, + /// tag: WWW + pub website: Option, +} + diff --git a/src/types/date.rs b/src/types/date.rs new file mode 100644 index 0000000..a4f18ac --- /dev/null +++ b/src/types/date.rs @@ -0,0 +1,36 @@ +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// TODO Date should encompasses a number of date formats, e.g. approximated, period, phrase and range. +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Date { + pub value: Option, + pub time: Option, +} + +impl Date { + /// datetime returns Date and Date.time in a single string. + pub fn datetime(&self) -> Option { + match &self.time { + Some(time) => { + let mut dt = String::new(); + dt.push_str(self.value.as_ref().unwrap().as_str()); + dt.push_str(" "); + dt.push_str(&time); + Some(dt) + } + None => None, + } + } +} + +/// ChangeDate is intended to only record the last change to a record. Some systems may want to +/// manage the change process with more detail, but it is sufficient for GEDCOM purposes to +/// indicate the last time that a record was modified. +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct ChangeDate { + pub date: Option, + pub note: Option, +} diff --git a/src/types/header.rs b/src/types/header.rs index cfa2750..9ba2afa 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,37 +1,129 @@ -use crate::types::Source; +use crate::types::{Copyright, Corporation, Date, Note}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; +use super::CustomData; + +/// Header (tag: HEAD) containing GEDCOM metadata. +/// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// Header containing GEDCOM metadata pub struct Header { - pub encoding: Option, - pub copyright: Option, - pub corporation: Option, - pub date: Option, - pub destinations: Vec, - pub gedcom_version: Option, - pub language: Option, - pub filename: Option, - pub note: Option, - pub sources: Vec, + /// tag: GEDC + pub gedcom: Option, + /// tag: CHAR + pub encoding: Option, + /// tag: SOUR + pub source: Option, + /// tag: DEST, an identifier for the system expected to receive this document. + /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#DEST + pub destination: Option, + /// tag: DATE + pub date: Option, + /// tag: SUBM See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#SUBM pub submitter_tag: Option, + /// tag: SUBN pub submission_tag: Option, + /// tag: COPR + pub copyright: Option, + /// tag: LANG (HEAD-LANG), a default language which may be used to interpret any Text-typed + /// payloads that lack a specific language tag from a LANG structure. An application may choose + /// to use a different default based on its knowledge of the language preferences of the user. + /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-LANG + pub language: Option, + /// tag: FILE, the name of the GEDCOM transmission file. If the file name includes a file + /// extension it must be shown in the form (filename.ext). See Gedcom 5.5.1 specification, p. 50. + pub filename: Option, + /// tag: NOTE + pub note: Option, + /// tag: PLAC + pub place: Option, + pub custom_data: Vec, } impl Header { - pub fn add_destination(&mut self, destination: String) { - self.destinations.push(destination); + pub fn add_custom_data(&mut self, data: CustomData) { + self.custom_data.push(data) } +} - pub fn add_source(&mut self, source: Source) { - self.sources.push(source); - } +/// GedcomDocument (tag: GEDC) is a container for information about the entire document. It is +/// recommended that applications write GEDC with its required subrecord VERS as the first +/// substructure of a HEAD. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#GEDC +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct GedcomDocument { + /// tag: VERS + pub version: Option, + /// tag: FORM; see Gedcom 5.5.1 specification, p. 50 + pub form: Option, +} + +/// Encoding (tag: CHAR) is a code value that represents the character set to be used to +/// interpret this data. See Gedcom 5.5.1 specification, p. 44 +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Encoding { + pub value: Option, + /// tag: VERS + pub version: Option, } -// pub struct HeaderSource { -// version: Option, -// name: Option, -// coroporation: -// } +/// HeadSource (tag: SOUR) is an identifier for the product producing the gedcom data. A +/// registration process for these identifiers existed for a time, but no longer does. If an +/// existing identifier is known, it should be used. Otherwise, a URI owned by the product should +/// be used instead. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-SOUR +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct HeadSource { + pub value: Option, + /// tag: VERS + pub version: Option, + /// tag: NAME + pub name: Option, + /// tag: CORP + pub corporation: Option, + /// tag: DATA + pub data: Option, +} + +/// The electronic data source or digital repository from which this dataset was exported. The +/// payload is the name of that source, with substructures providing additional details about the +/// source (not the export). See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-SOUR-DATA +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct HeadSourData { + pub value: Option, + /// tag: DATE + pub date: Option, + /// tag: COPR + pub copyright: Option, +} + +/// HeadPlace (tag: PLAC) is is a placeholder for providing a default PLAC.FORM, and must not have +/// a payload. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct HeadPlac { + /// form (tag: FORM) is a comma-separated list of jurisdictional titles (e.g. City, County, + /// State, Country). It has the same number of elements and in the same order as the PLAC + /// structure. As with PLAC, this shall be ordered from lowest to highest jurisdiction. + /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#PLAC-FORM + pub form: Vec, +} + +impl HeadPlac { + pub fn push_jurisdictional_title(&mut self, title: String) { + self.form.push(title); + } + + // Adhering to "lowest to highest jurisdiction" is the responsibility of the + // Gedcom author, but methods for reordering elements might still be useful. + pub fn insert_jurisdictional_title(&mut self, index: usize, title: String) { + self.form.insert(index, title); + } + + pub fn remove_jurisdictional_title(&mut self, index: usize) { + self.form.remove(index); + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs index 07dc0e5..5e79810 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -10,6 +10,9 @@ use serde::{Deserialize, Serialize}; pub mod event; pub use event::{Event, EventType}; +pub mod date; +pub use date::{Date, ChangeDate}; + mod address; pub use address::*; @@ -31,6 +34,18 @@ pub use submitter::*; mod source; pub use source::*; +mod note; +pub use note::*; + +mod translation; +pub use translation::*; + +mod copyright; +pub use copyright::*; + +mod corporation; +pub use corporation::*; + // TODO /// Multimedia item #[derive(Debug)] diff --git a/src/types/note.rs b/src/types/note.rs new file mode 100644 index 0000000..01bd550 --- /dev/null +++ b/src/types/note.rs @@ -0,0 +1,32 @@ +use crate::types::{Translation, Source}; +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// Note (tag:NOTE) is a note_structure, containing additional information provided by the +/// submitter for understanding the enclosing data. +/// +/// When a substructure of HEAD, it should describe the contents of the document in terms of +/// “ancestors or descendants of” so that the person receiving the data knows what genealogical +/// information the document contains. +/// +/// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#NOTE +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Note { + pub value: Option, + /// tag: MIME, indicates the media type of the payload of the superstructure, as defined by BCP + /// 13. As of version 7.0, only 2 media types are supported by this structure: text/plain and + /// text/html + pub mime: Option, + /// tag: TRAN, a type of TRAN for unstructured human-readable text, such as is found in NOTE + /// and SNOTE payloads. + pub translation: Option, + /// tag: SOUR, a citation indicating that the pointed-to source record supports the claims made + /// in the superstructure. See + /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#SOURCE_CITATION + pub citation: Option, + /// tag: LANG, The primary human language of the superstructure. The primary language in which + /// the Text-typed payloads of the superstructure and its substructures appear. See + /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#LANG + pub language: Option, +} diff --git a/src/types/place.rs b/src/types/place.rs new file mode 100644 index 0000000..c74812a --- /dev/null +++ b/src/types/place.rs @@ -0,0 +1,18 @@ +use crate::types::{Address, Date, Note}; +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// The principal place in which the superstructure’s subject occurred, represented as a List of +/// jurisdictional entities in a sequence from the lowest to the highest jurisdiction. As with +/// other lists, the jurisdictions are separated by commas. Any jurisdiction’s name that is missing +/// is still accounted for by an empty string in the list. +/// +/// The type of each jurisdiction is given in the PLAC.FORM substructure, if present, or in the +/// HEAD.PLAC.FORM structure. If neither is present, the jurisdictional types are unspecified +/// beyond the lowest-to-highest order noted above. +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Place { + pub value: Option, + pub form: Option, +} diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 73e403e..0af8d69 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -16,6 +16,8 @@ pub struct Submitter { pub address: Option
, /// Phone number of the submitter pub phone: Option, + /// TODO + pub language: Option, } impl Submitter { @@ -27,6 +29,9 @@ impl Submitter { name: None, address: None, phone: None, + language: None, } } } + + diff --git a/src/types/translation.rs b/src/types/translation.rs new file mode 100644 index 0000000..bb1adee --- /dev/null +++ b/src/types/translation.rs @@ -0,0 +1,16 @@ +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// Translation (tag:TRAN) is a type of TRAN for unstructured human-readable text, such as +/// is found in NOTE and SNOTE payloads. Each NOTE-TRAN must have either a LANG substructure or a +/// MIME substructure or both. If either is missing, it is assumed to have the same value as the +/// superstructure. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#NOTE-TRAN +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Translation { + pub value: Option, + /// tag:MIME + pub mime: Option, + /// tag:LANG + pub language: Option, +} diff --git a/tests/header.rs b/tests/header.rs new file mode 100644 index 0000000..2bd2cd6 --- /dev/null +++ b/tests/header.rs @@ -0,0 +1,287 @@ +#[cfg(test)] +mod tests { + use gedcom::parser::Parser; + + #[test] + fn parse_head_gedc() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 2 FORM LINEAGE-LINKED\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let head_gedc = data.header.gedcom.unwrap(); + assert_eq!(head_gedc.version.unwrap(), "5.5"); + assert_eq!(head_gedc.form.unwrap(), "LINEAGE-LINKED"); + } + + #[test] + fn parse_head_sour() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 SOUR SOURCE_NAME\n\ + 2 VERS Version number of source-program\n\ + 2 NAME Name of source-program\n\ + 2 CORP Corporation name\n\ + 3 ADDR 2 Harrison Street\n\ + 4 CONT 7th Floor\n\ + 4 CONT Suite 175\n\ + 4 ADR1 2 Harrison Street\n\ + 4 ADR2 7th Floor\n\ + 4 ADR3 Suite 175\n\ + 4 CITY San Francisco\n\ + 4 STAE California\n\ + 4 POST 94105\n\ + 4 CTRY USA\n\ + 3 PHON Corporation phone number\n\ + 2 DATA Name of source data\n\ + 3 DATE 1 JAN 1998\n\ + 3 COPR Copyright of source data\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let sour = data.header.source.unwrap(); + assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); + + let vers = sour.version.unwrap(); + assert_eq!(vers, "Version number of source-program"); + + let name = sour.name.unwrap(); + assert_eq!(name, "Name of source-program"); + + let corp = sour.corporation.unwrap(); + assert_eq!(corp.value.unwrap(), "Corporation name"); + + let corp_addr = corp.address.unwrap(); + assert_eq!( + corp_addr.value.unwrap(), + "2 Harrison Street\n7th Floor\nSuite 175" + ); + assert_eq!(corp_addr.adr1.unwrap(), "2 Harrison Street"); + assert_eq!(corp_addr.adr2.unwrap(), "7th Floor"); + assert_eq!(corp_addr.adr3.unwrap(), "Suite 175"); + assert_eq!(corp_addr.city.unwrap(), "San Francisco"); + assert_eq!(corp_addr.state.unwrap(), "California"); + assert_eq!(corp_addr.post.unwrap(), "94105"); + assert_eq!(corp_addr.country.unwrap(), "USA"); + + let corp_phon = corp.phone.unwrap(); + assert_eq!(corp_phon, "Corporation phone number"); + + let sour_data = sour.data.unwrap(); + assert_eq!(sour_data.value.unwrap(), "Name of source data"); + assert_eq!(sour_data.date.unwrap().value.unwrap(), "1 JAN 1998"); + assert_eq!( + sour_data.copyright.unwrap().value.unwrap(), + "Copyright of source data" + ); + } + + #[test] + fn parse_head_dest() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 DEST Destination of transmission\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + assert_eq!( + data.header.destination.unwrap(), + "Destination of transmission" + ); + } + + #[test] + fn parse_head_date() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 DATE 1 JAN 1998\n\ + 2 TIME 13:57:24.80\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_date = data.header.date.unwrap(); + assert_eq!(h_date.value.unwrap(), "1 JAN 1998"); + assert_eq!(h_date.time.unwrap(), "13:57:24.80"); + } + + #[test] + fn parse_head_subm() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 SUBM @SUBMITTER@\n\ + 1 SUBN @SUBMISSION@\n\ + 1 FILE ALLGED.GED\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_subm = data.header.submitter_tag.unwrap(); + assert_eq!(h_subm.as_str(), "@SUBMITTER@"); + } + + #[test] + fn parse_head_subn() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 SUBM @SUBMITTER@\n\ + 1 SUBN @SUBMISSION@\n\ + 1 FILE ALLGED.GED\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_subn = data.header.submission_tag.unwrap(); + assert_eq!(h_subn.as_str(), "@SUBMISSION@"); + } + + #[test] + fn parse_head_file() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 SUBM @SUBMITTER@\n\ + 1 SUBN @SUBMISSION@\n\ + 1 FILE ALLGED.GED\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_file = data.header.filename.unwrap(); + assert_eq!(h_file.as_str(), "ALLGED.GED"); + } + + #[test] + fn parse_head_copr() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 COPR (C) 1997-2000 by H. Eichmann.\n\ + 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_copr = data.header.copyright.unwrap(); + assert_eq!(h_copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); + assert_eq!( + h_copr.continued.unwrap(), + "You can use and distribute this file freely as long as you do not charge for it." + ); + } + + #[test] + fn parse_head_char() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 CHAR ASCII\n\ + 2 VERS Version number of ASCII (whatever it means)\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_char = data.header.encoding.unwrap(); + assert_eq!(h_char.value.unwrap(), "ASCII"); + assert_eq!( + h_char.version.unwrap(), + "Version number of ASCII (whatever it means)" + ); + } + + #[test] + fn parse_head_lang() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 LANG language + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_lang = data.header.language.unwrap(); + assert_eq!(h_lang.as_str(), "language"); + } + + #[test] + fn parse_head_plac() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 PLAC\n\ + 2 FORM City, County, State, Country\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_plac = data.header.place.unwrap(); + assert_eq!(h_plac.form[0], "City"); + assert_eq!(h_plac.form[1], "County"); + assert_eq!(h_plac.form[2], "State"); + assert_eq!(h_plac.form[3], "Country"); + } + + #[test] + fn parse_head_note() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 1 NOTE A general note about this file:\n\ + 2 CONT It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1:\n\ + 2 CONT His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given),\n\ + 2 CONT adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7).\n\ + 2 CONT In PERSON1, FAMILY1, SUBMITTER, SUBMISSION and SOURCE1 as many datafields as possible are used.\n\ + 2 CONT All other individuals/families contain no data. Note, that many data tags can appear more than once\n\ + 2 CONT (in this transmission this is demonstrated with tags: NAME, OCCU, PLACE and NOTE. Seek the word 'another'.\n\ + 2 CONT The data transmitted here do not make sence. Just the HEAD.DATE tag contains the date of the creation\n\ + 2 CONT of this file and will change in future Versions!\n\ + 2 CONT This file is created by H. Eichmann: h.eichmann@@gmx.de. Feel free to copy and use it for any\n\ + 2 CONT non-commercial purpose. For the creation the GEDCOM standard Release 5.5 (2 JAN 1996) has been used.\n\ + 2 CONT Copyright: gedcom@@gedcom.org\n\ + 2 CONT Download it (the GEDCOM 5.5 specs) from: ftp.gedcom.com/pub/genealogy/gedcom.\n\ + 2 CONT Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit).\n\ + 2 CONT This @@ (commercial at) character may only appear ONCE!\n\ + 2 CONT Note continued here. The word TE\n\ + 2 CONC ST should not be broken!\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + + let h_note = data.header.note.unwrap(); + assert_eq!(h_note.value.unwrap().chars().count(), 1441); + } +} diff --git a/tests/json_feature.rs b/tests/json_feature.rs index 57232a9..19b7ad8 100644 --- a/tests/json_feature.rs +++ b/tests/json_feature.rs @@ -52,6 +52,36 @@ mod json_feature_tests { let gedcom_content: String = read_relative("./tests/fixtures/simple.ged"); let data = parse(gedcom_content.chars()); + assert_eq!(serde_json::to_string_pretty(&data.header).unwrap(), "\ +{ + \"gedcom\": { + \"version\": \"5.5\", + \"form\": \"Lineage-Linked\" + }, + \"encoding\": { + \"value\": \"ASCII\", + \"version\": null + }, + \"source\": { + \"value\": \"ID_OF_CREATING_FILE\", + \"version\": null, + \"name\": null, + \"corporation\": null, + \"data\": null + }, + \"destination\": null, + \"date\": null, + \"submitter_tag\": \"@SUBMITTER@\", + \"submission_tag\": null, + \"copyright\": null, + \"language\": null, + \"filename\": null, + \"note\": null, + \"place\": null, + \"custom_data\": [] +}\ + "); + assert_eq!( serde_json::to_string_pretty(&data.families).unwrap(), "[ diff --git a/tests/lib.rs b/tests/lib.rs index 270cb2f..2127ea0 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -26,9 +26,12 @@ mod tests { assert_eq!(data.submitters.len(), 1); // header - assert_eq!(data.header.encoding.unwrap().as_str(), "ASCII"); + assert_eq!( + data.header.encoding.unwrap().value.unwrap().as_str(), + "ASCII" + ); assert_eq!(data.header.submitter_tag.unwrap().as_str(), "@SUBMITTER@"); - assert_eq!(data.header.gedcom_version.unwrap().as_str(), "5.5"); + assert_eq!(data.header.gedcom.unwrap().version.unwrap(), "5.5"); // names assert_eq!( From 9e4b8c9e2acb6dd02910ea6310bd542105c496a1 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Thu, 3 Nov 2022 22:13:29 -0500 Subject: [PATCH 23/55] Add Parse trait and implement types. --- src/parser.rs | 798 ++------------------------------------- src/tree.rs | 2 +- src/types/address.rs | 64 ++++ src/types/copyright.rs | 38 ++ src/types/corporation.rs | 46 ++- src/types/date.rs | 38 ++ src/types/event.rs | 51 ++- src/types/family.rs | 54 ++- src/types/header.rs | 262 ++++++++++++- src/types/individual.rs | 155 +++++++- src/types/mod.rs | 37 +- src/types/note.rs | 53 ++- src/types/repository.rs | 101 +++++ src/types/source.rs | 93 ++++- src/types/submitter.rs | 46 ++- src/types/translation.rs | 43 +++ src/util.rs | 70 ++++ tests/header.rs | 24 +- tests/lib.rs | 8 +- 19 files changed, 1107 insertions(+), 876 deletions(-) create mode 100644 src/types/repository.rs diff --git a/src/parser.rs b/src/parser.rs index cf72072..d4ed1af 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,11 +3,14 @@ use std::{panic, str::Chars}; use crate::tokenizer::{Token, Tokenizer}; use crate::tree::GedcomData; -use crate::types::{ - event::HasEvents, Address, Copyright, Corporation, CustomData, Date, Encoding, Event, Family, - FamilyLink, GedcomDocument, Gender, HeadPlac, HeadSourData, HeadSource, Header, Individual, - Name, Note, RepoCitation, Repository, Source, SourceCitation, Submitter, Translation, -}; +use crate::types::{Family, Header, Individual, Repository, Source, Submitter}; +use crate::util::{dbg, parse_custom_tag}; + +/// Parse converts a subset of a token list into a type's data structure. +pub trait Parse { + /// parse does the actual parsing of a subset of a token list + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8); +} /// The Gedcom parser that converts the token list into a data structure pub struct Parser<'a> { @@ -31,7 +34,7 @@ impl<'a> Parser<'a> { Token::Level(n) => n, _ => panic!( "{} Expected Level, found {:?}", - self.dbg(), + dbg(&self.tokenizer), self.tokenizer.current_token ), }; @@ -46,25 +49,27 @@ impl<'a> Parser<'a> { if let Token::Tag(tag) = &self.tokenizer.current_token { match tag.as_str() { - "HEAD" => data.header = self.parse_header(), - "FAM" => data.add_family(self.parse_family(level, pointer)), - "INDI" => data.add_individual(self.parse_individual(level, pointer)), - "REPO" => data.add_repository(self.parse_repository(level, pointer)), - "SOUR" => data.add_source(self.parse_source(level, pointer)), - "SUBM" => data.add_submitter(self.parse_submitter(level, pointer)), + "HEAD" => data.header = Some(Header::new(&mut self.tokenizer, 0)), + "FAM" => data.add_family(Family::new(&mut self.tokenizer, 0, pointer)), + "INDI" => { + data.add_individual(Individual::new(&mut self.tokenizer, level, pointer)) + } + "REPO" => data.add_repository(Repository::new(&mut self.tokenizer, level, pointer)), + "SOUR" => data.add_source(Source::new(&mut self.tokenizer, level, pointer)), + "SUBM" => data.add_submitter(Submitter::new(&mut self.tokenizer, 0, pointer)), "TRLR" => break, _ => { - println!("{} Unhandled tag {}", self.dbg(), tag); + println!("{} Unhandled tag {}", dbg(&self.tokenizer), tag); self.tokenizer.next_token(); } }; } else if let Token::CustomTag(tag) = &self.tokenizer.current_token { // TODO let tag_clone = tag.clone(); - let custom_data = self.parse_custom_tag(tag_clone); + let custom_data = parse_custom_tag(&mut self.tokenizer, tag_clone); println!( "{} Skipping top-level custom tag: {:?}", - self.dbg(), + dbg(&self.tokenizer), custom_data ); while self.tokenizer.current_token != Token::Level(0) { @@ -73,773 +78,12 @@ impl<'a> Parser<'a> { } else { println!( "{} Unhandled token {:?}", - self.dbg(), + dbg(&self.tokenizer), self.tokenizer.current_token ); self.tokenizer.next_token(); }; } - - data - } - - /// Parses HEAD top-level tag. See - /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER - fn parse_header(&mut self) -> Header { - // skip over HEAD tag name - self.tokenizer.next_token(); - - let mut header = Header::default(); - - while self.tokenizer.current_token != Token::Level(0) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "GEDC" => { - header = self.parse_gedcom_data(header); - } - "SOUR" => header.source = Some(self.parse_head_source()), - "DEST" => header.destination = Some(self.take_line_value()), - "DATE" => header.date = Some(self.parse_date(1)), - "SUBM" => header.submitter_tag = Some(self.take_line_value()), - "SUBN" => header.submission_tag = Some(self.take_line_value()), - "FILE" => header.filename = Some(self.take_line_value()), - "COPR" => header.copyright = Some(self.parse_copyright(1)), - "CHAR" => header.encoding = Some(self.parse_encoding_data()), - "LANG" => header.language = Some(self.take_line_value()), - "NOTE" => header.note = Some(self.parse_note(1)), - "PLAC" => header.place = Some(self.parse_head_plac()), - _ => panic!("{} Unhandled Header Tag: {}", self.dbg(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - header.add_custom_data(self.parse_custom_tag(tag_clone)) - } - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Header Token: {:?}", - &self.tokenizer.current_token - ), - } - } - header - } - - /// parse_head_source handles the SOUR tag in a header - fn parse_head_source(&mut self) -> HeadSource { - let mut sour = HeadSource::default(); - sour.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= 1 { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "VERS" => sour.version = Some(self.take_line_value()), - "NAME" => sour.name = Some(self.take_line_value()), - "CORP" => sour.corporation = Some(self.parse_corporation(2)), - "DATA" => sour.data = Some(self.parse_head_data(2)), - _ => panic!("{} Unhandled CHAR Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unexpected SOUR Token: {:?}", &self.tokenizer.current_token), - } - } - sour - } - - /// parse_corporation is for a CORP tag within the SOUR tag of a HEADER - fn parse_corporation(&mut self, level: u8) -> Corporation { - let mut corp = Corporation::default(); - corp.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "ADDR" => corp.address = Some(self.parse_address(level + 1)), - "PHON" => corp.phone = Some(self.take_line_value()), - "EMAIL" => corp.email = Some(self.take_line_value()), - "FAX" => corp.fax = Some(self.take_line_value()), - "WWW" => corp.website = Some(self.take_line_value()), - _ => panic!("{} Unhandled CORP tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled CORP tag in header: {:?}", - self.tokenizer.current_token - ), - } - } - corp - } - - /// parse_head_data parses the DATA tag - fn parse_head_data(&mut self, level: u8) -> HeadSourData { - let mut data = HeadSourData::default(); - data.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => data.date = Some(self.parse_date(level + 1)), - "COPR" => data.copyright = Some(self.parse_copyright(level + 1)), - _ => panic!("{} unhandled DATA tag in header: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled SOUR tag in header: {:?}", - self.tokenizer.current_token - ), - } - } data } - - /// parse_head_plac handles the PLAC tag when it is present in header - fn parse_head_plac(&mut self) -> HeadPlac { - let mut h_plac = HeadPlac::default(); - // In the header, PLAC should have no payload. See - // https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC - self.tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= 1 { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "FORM" => { - let form = self.take_line_value(); - let jurisdictional_titles = form.split(","); - - for t in jurisdictional_titles { - let v = t.trim(); - h_plac.push_jurisdictional_title(v.to_string()); - } - } - _ => panic!("{} Unhandled PLAC tag in header: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled PLAC tag in header: {:?}", - self.tokenizer.current_token - ), - } - } - - h_plac - } - - /// parse_copyright handles the COPR tag - fn parse_copyright(&mut self, level: u8) -> Copyright { - let mut copyright = Copyright::default(); - copyright.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" => copyright.continued = Some(self.take_line_value()), - "CONC" => copyright.continued = Some(self.take_line_value()), - _ => panic!("{} unhandled COPR tag in header: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled tag in COPR: {:?}", self.tokenizer.current_token), - } - } - copyright - } - - /// Parses SUBM top-level tag - fn parse_submitter(&mut self, level: u8, xref: Option) -> Submitter { - // skip over SUBM tag name - self.tokenizer.next_token(); - - let mut submitter = Submitter::new(xref); - while self.tokenizer.current_token != Token::Level(level) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "NAME" => submitter.name = Some(self.take_line_value()), - "ADDR" => { - submitter.address = Some(self.parse_address(level + 1)); - } - "PHON" => submitter.phone = Some(self.take_line_value()), - "LANG" => submitter.language = Some(self.take_line_value()), - // "CHAN" => submitter.change_date = Some(self.take_line_value()), - _ => panic!("{} Unhandled Submitter Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Submitter Token: {:?}", - self.tokenizer.current_token - ), - } - } - // println!("found submitter:\n{:#?}", submitter); - submitter - } - - /// Parses INDI top-level tag - fn parse_individual(&mut self, level: u8, xref: Option) -> Individual { - // skip over INDI tag name - self.tokenizer.next_token(); - let mut individual = Individual::new(xref); - - while self.tokenizer.current_token != Token::Level(level) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "NAME" => individual.name = Some(self.parse_name(level + 1)), - "SEX" => individual.sex = self.parse_gender(), - "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" - | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" - | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { - let tag_clone = tag.clone(); - individual.add_event(self.parse_event(tag_clone.as_str(), level + 1)); - } - "FAMC" | "FAMS" => { - let tag_clone = tag.clone(); - individual - .add_family(self.parse_family_link(tag_clone.as_str(), level + 1)); - } - "CHAN" => { - // assuming it always only has a single DATE subtag - self.tokenizer.next_token(); // level - self.tokenizer.next_token(); // DATE tag - individual.last_updated = Some(self.take_line_value()); - } - _ => panic!("{} Unhandled Individual Tag: {}", self.dbg(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - individual.add_custom_data(self.parse_custom_tag(tag_clone)) - } - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Individual Token: {:?}", - self.tokenizer.current_token - ), - } - } - // println!("found individual:\n{:#?}", individual); - individual - } - - /// Parses FAM top-level tag - fn parse_family(&mut self, level: u8, xref: Option) -> Family { - // skip over FAM tag name - self.tokenizer.next_token(); - let mut family = Family::new(xref); - - while self.tokenizer.current_token != Token::Level(level) { - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MARR" => family.add_event(self.parse_event("MARR", level + 1)), - "HUSB" => family.set_individual1(self.take_line_value()), - "WIFE" => family.set_individual2(self.take_line_value()), - "CHIL" => family.add_child(self.take_line_value()), - _ => panic!("{} Unhandled Family Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Family Token: {:?}", self.tokenizer.current_token), - } - } - - // println!("found family:\n{:#?}", family); - family - } - - fn parse_source(&mut self, level: u8, xref: Option) -> Source { - // skip SOUR tag - self.tokenizer.next_token(); - let mut source = Source::new(xref); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATA" => self.tokenizer.next_token(), - "EVEN" => { - let events_recorded = self.take_line_value(); - let mut event = self.parse_event("OTHER", level + 2); - event.with_source_data(events_recorded); - source.data.add_event(event); - } - "AGNC" => source.data.agency = Some(self.take_line_value()), - "ABBR" => source.abbreviation = Some(self.take_continued_text(level + 1)), - "TITL" => source.title = Some(self.take_continued_text(level + 1)), - "REPO" => source.add_repo_citation(self.parse_repo_citation(level + 1)), - _ => panic!("{} Unhandled Source Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Source Token: {:?}", self.tokenizer.current_token), - } - } - - // println!("found source:\n{:#?}", source); - source - } - - /// Parses REPO top-level tag. - fn parse_repository(&mut self, level: u8, xref: Option) -> Repository { - // skip REPO tag - self.tokenizer.next_token(); - let mut repo = Repository { - xref, - name: None, - address: None, - }; - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "NAME" => repo.name = Some(self.take_line_value()), - "ADDR" => repo.address = Some(self.parse_address(level + 1)), - _ => panic!("{} Unhandled Repository Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Repository Token: {:?}", - self.tokenizer.current_token - ), - } - } - // println!("found repositiory:\n{:#?}", repo); - repo - } - - fn parse_custom_tag(&mut self, tag: String) -> CustomData { - let value = self.take_line_value(); - CustomData { tag, value } - } - - /// parse_encoding_data handles the parsing of the CHARS tag - fn parse_encoding_data(&mut self) -> Encoding { - let mut encoding = Encoding::default(); - - encoding.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= 1 { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "VERS" => encoding.version = Some(self.take_line_value()), - _ => panic!("{} Unhandled CHAR Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "{} Unexpected CHAR Token: {:?}", - self.dbg(), - &self.tokenizer.current_token - ), - } - } - encoding - } - - /// parse_data handles the DATE tag - fn parse_date(&mut self, level: u8) -> Date { - let mut date = Date::default(); - date.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "TIME" => date.time = Some(self.take_line_value()), - _ => panic!("{} unhandled DATE tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unexpected DATE token: {:?}", &self.tokenizer.current_token), - } - } - date - } - - ///parse_translation handles the TRAN tag - fn parse_translation(&mut self, level: u8) -> Translation { - let mut tran = Translation::default(); - tran.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MIME" => tran.mime = Some(self.take_line_value()), - "LANG" => tran.language = Some(self.take_line_value()), - _ => panic!("{} unhandled NOTE tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unexpected NOTE token: {:?}", &self.tokenizer.current_token), - } - } - tran - } - - ///parse_note handles the NOTE tag - fn parse_note(&mut self, level: u8) -> Note { - let mut note = Note::default(); - let mut value = String::new(); - - value.push_str(&self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MIME" => note.mime = Some(self.take_line_value()), - "TRAN" => note.translation = Some(self.parse_translation(level + 1)), - "LANG" => note.language = Some(self.take_line_value()), - "CONT" | "CONC" => { - value.push('\n'); - value.push_str(&self.take_line_value()); - } - _ => panic!("{} unhandled NOTE tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unexpected NOTE token: {:?}", &self.tokenizer.current_token), - } - } - if value != "" { - note.value = Some(value); - } - note - } - - /// Handle parsing GEDC tag - fn parse_gedcom_data(&mut self, mut header: Header) -> Header { - let mut gedc = GedcomDocument::default(); - - // skip GEDC tag - self.tokenizer.next_token(); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= 1 { - break; - } - } - - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "VERS" => gedc.version = Some(self.take_line_value()), - // this is the only value that makes sense. warn them otherwise. - "FORM" => { - let form = self.take_line_value(); - if &form.to_uppercase() != "LINEAGE-LINKED" { - println!( - "WARNING: Unrecognized GEDCOM form. Expected LINEAGE-LINKED, found {}" - , form); - } - gedc.form = Some(form); - } - _ => panic!("{} Unhandled GEDC Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "{} Unexpected GEDC Token: {:?}", - self.dbg(), - &self.tokenizer.current_token - ), - } - } - header.gedcom = Some(gedc); - header - } - - fn parse_family_link(&mut self, tag: &str, level: u8) -> FamilyLink { - let xref = self.take_line_value(); - let mut link = FamilyLink::new(xref, tag); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PEDI" => link.set_pedigree(self.take_line_value().as_str()), - _ => panic!("{} Unhandled FamilyLink Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled FamilyLink Token: {:?}", - self.tokenizer.current_token - ), - } - } - - link - } - - fn parse_repo_citation(&mut self, level: u8) -> RepoCitation { - let xref = self.take_line_value(); - let mut citation = RepoCitation { - xref, - call_number: None, - }; - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CALN" => citation.call_number = Some(self.take_line_value()), - _ => panic!("{} Unhandled RepoCitation Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled RepoCitation Token: {:?}", - self.tokenizer.current_token - ), - } - } - citation - } - - fn parse_gender(&mut self) -> Gender { - self.tokenizer.next_token(); - let gender: Gender; - if let Token::LineValue(gender_string) = &self.tokenizer.current_token { - gender = match gender_string.as_str() { - "M" => Gender::Male, - "F" => Gender::Female, - "N" => Gender::Nonbinary, - "U" => Gender::Unknown, - _ => panic!("{} Unknown gender value {}", self.dbg(), gender_string), - }; - } else { - panic!( - "Expected gender LineValue, found {:?}", - self.tokenizer.current_token - ); - } - self.tokenizer.next_token(); - gender - } - - fn parse_name(&mut self, level: u8) -> Name { - let mut name = Name::default(); - name.value = Some(self.take_line_value()); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "GIVN" => name.given = Some(self.take_line_value()), - "NPFX" => name.prefix = Some(self.take_line_value()), - "NSFX" => name.suffix = Some(self.take_line_value()), - "SPFX" => name.surname_prefix = Some(self.take_line_value()), - "SURN" => name.surname = Some(self.take_line_value()), - _ => panic!("{} Unhandled Name Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Name Token: {:?}", self.tokenizer.current_token), - } - } - - name - } - - fn parse_event(&mut self, tag: &str, level: u8) -> Event { - self.tokenizer.next_token(); - let mut event = Event::from_tag(tag); - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => event.date = Some(self.take_line_value()), - "PLAC" => event.place = Some(self.take_line_value()), - "SOUR" => event.add_citation(self.parse_citation(level + 1)), - _ => panic!("{} Unhandled Event Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!("Unhandled Event Token: {:?}", self.tokenizer.current_token), - } - } - event - } - - /// Parses ADDR tag - fn parse_address(&mut self, level: u8) -> Address { - // skip ADDR tag - self.tokenizer.next_token(); - let mut address = Address::default(); - let mut value = String::new(); - - // handle value on ADDR line - if let Token::LineValue(addr) = &self.tokenizer.current_token { - value.push_str(addr); - self.tokenizer.next_token(); - } - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" | "CONC" => { - value.push('\n'); - value.push_str(&self.take_line_value()); - } - "ADR1" => address.adr1 = Some(self.take_line_value()), - "ADR2" => address.adr2 = Some(self.take_line_value()), - "ADR3" => address.adr3 = Some(self.take_line_value()), - "CITY" => address.city = Some(self.take_line_value()), - "STAE" => address.state = Some(self.take_line_value()), - "POST" => address.post = Some(self.take_line_value()), - "CTRY" => address.country = Some(self.take_line_value()), - _ => panic!("{} Unhandled Address Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Address Token: {:?}", - self.tokenizer.current_token - ), - } - } - - if &value != "" { - address.value = Some(value); - } - - address - } - - fn parse_citation(&mut self, level: u8) -> SourceCitation { - let mut citation = SourceCitation { - xref: self.take_line_value(), - page: None, - }; - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PAGE" => citation.page = Some(self.take_line_value()), - _ => panic!("{} Unhandled Citation Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Citation Token: {:?}", - self.tokenizer.current_token - ), - } - } - citation - } - - /// Takes the value of the current line including handling - /// multi-line values from CONT & CONC tags. - fn take_continued_text(&mut self, level: u8) -> String { - let mut value = self.take_line_value(); - - loop { - if let Token::Level(cur_level) = self.tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &self.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" => { - value.push('\n'); - value.push_str(&self.take_line_value()) - } - "CONC" => { - value.push(' '); - value.push_str(&self.take_line_value()) - } - _ => panic!("{} Unhandled Continuation Tag: {}", self.dbg(), tag), - }, - Token::Level(_) => self.tokenizer.next_token(), - _ => panic!( - "Unhandled Continuation Token: {:?}", - self.tokenizer.current_token - ), - } - } - - value - } - - /// Grabs and returns to the end of the current line as a String - fn take_line_value(&mut self) -> String { - let value: String; - self.tokenizer.next_token(); - - if let Token::LineValue(val) = &self.tokenizer.current_token { - value = val.to_string(); - } else { - panic!( - "{} Expected LineValue, found {:?}", - self.dbg(), - self.tokenizer.current_token - ); - } - self.tokenizer.next_token(); - value - } - - /// Debug function displaying GEDCOM line number of error message. - fn dbg(&self) -> String { - format!("line {}:", self.tokenizer.line) - } } diff --git a/src/tree.rs b/src/tree.rs index e8210e2..3d0f82f 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; /// The data structure representing all the data within a gedcom file pub struct GedcomData { /// Header containing file metadata - pub header: Header, + pub header: Option
, /// List of submitters of the facts pub submitters: Vec, /// Individuals within the family tree diff --git a/src/types/address.rs b/src/types/address.rs index 7fe22ef..5d4fe1e 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -2,6 +2,12 @@ use serde::{Deserialize, Serialize}; use std::fmt; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + util::{dbg, take_line_value}, +}; + /// Physical address at which a fact occurs #[derive(Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -16,6 +22,64 @@ pub struct Address { pub country: Option, } +impl Address { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Address { + let mut addr = Address::default(); + addr.parse(tokenizer, level); + addr + } +} + +impl Parse for Address { + /// parse handles ADDR tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + + // skip ADDR tag + tokenizer.next_token(); + + let mut value = String::new(); + + // handle value on ADDR line + if let Token::LineValue(addr) = &tokenizer.current_token { + value.push_str(&addr); + tokenizer.next_token(); + } + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONT" | "CONC" => { + value.push('\n'); + value.push_str(&take_line_value(tokenizer)); + } + "ADR1" => self.adr1 = Some(take_line_value(tokenizer)), + "ADR2" => self.adr2 = Some(take_line_value(tokenizer)), + "ADR3" => self.adr3 = Some(take_line_value(tokenizer)), + "CITY" => self.city = Some(take_line_value(tokenizer)), + "STAE" => self.state = Some(take_line_value(tokenizer)), + "POST" => self.post = Some(take_line_value(tokenizer)), + "CTRY" => self.country = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled Address Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled Address Token: {:?}", + tokenizer.current_token + ), + } + } + + if &value != "" { + self.value = Some(value); + } + } +} + impl fmt::Debug for Address { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut debug = f.debug_struct("Address"); diff --git a/src/types/copyright.rs b/src/types/copyright.rs index 5ce92be..c36ee25 100644 --- a/src/types/copyright.rs +++ b/src/types/copyright.rs @@ -1,6 +1,12 @@ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + util::{dbg, take_line_value}, +}; + /// A copyright statement, as appropriate for the copyright laws applicable to this data. /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#COPR #[derive(Debug, Default)] @@ -11,3 +17,35 @@ pub struct Copyright { pub continued: Option, } +impl Copyright { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Copyright { + let mut copr = Copyright::default(); + copr.parse(tokenizer, level); + copr + } +} + +impl Parse for Copyright { + /// parse the COPR tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONT" => self.continued = Some(take_line_value(tokenizer)), + "CONC" => self.continued = Some(take_line_value(tokenizer)), + _ => panic!("{} unhandled COPR tag in header: {}", dbg(&tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled tag in COPR: {:?}", tokenizer.current_token), + } + } + } +} diff --git a/src/types/corporation.rs b/src/types/corporation.rs index 6d339a8..b7b005b 100644 --- a/src/types/corporation.rs +++ b/src/types/corporation.rs @@ -1,4 +1,9 @@ -use crate::types::Address; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::Address, + util::{dbg, take_line_value}, +}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -20,3 +25,42 @@ pub struct Corporation { pub website: Option, } + +impl Corporation { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Corporation { + let mut corp = Corporation::default(); + corp.parse(tokenizer, level); + corp + } +} + +impl Parse for Corporation { + /// parse is for a CORP tag within the SOUR tag of a HEADER + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), + "PHON" => self.phone = Some(take_line_value(tokenizer)), + "EMAIL" => self.email = Some(take_line_value(tokenizer)), + "FAX" => self.fax = Some(take_line_value(tokenizer)), + "WWW" => self.website = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled CORP tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled CORP tag in header: {:?}", + tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/date.rs b/src/types/date.rs index a4f18ac..9f128e1 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -1,3 +1,9 @@ +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + util::{dbg, take_line_value}, +}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -25,6 +31,38 @@ impl Date { } } +impl Date { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Date { + let mut date = Date::default(); + date.parse(tokenizer, level); + date + } +} + +impl Parse for Date { + /// parse handles the DATE tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "TIME" => self.time = Some(take_line_value(tokenizer)), + _ => panic!("{} unhandled DATE tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unexpected DATE token: {:?}", tokenizer.current_token), + } + } + } +} + /// ChangeDate is intended to only record the last change to a record. Some systems may want to /// manage the change process with more detail, but it is sufficient for GEDCOM purposes to /// indicate the last time that a record was modified. diff --git a/src/types/event.rs b/src/types/event.rs index 549be52..88645b0 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,4 +1,9 @@ -use crate::types::SourceCitation; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::SourceCitation, + util::{dbg, take_line_value}, +}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; use std::{fmt, string::ToString}; @@ -38,13 +43,15 @@ pub struct Event { impl Event { #[must_use] - pub fn new(etype: EventType) -> Event { - Event { - event: etype, + pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> Event { + let mut event = Event { + event: Self::from_tag(tag), date: None, place: None, citations: Vec::new(), - } + }; + event.parse(tokenizer, level); + event } /** converts an event to be of type `SourceData` with `value` as the data */ @@ -52,9 +59,8 @@ impl Event { self.event = EventType::SourceData(value); } - #[must_use] - pub fn from_tag(tag: &str) -> Event { - let etype = match tag { + pub fn from_tag(tag: &str) -> EventType { + match tag { "ADOP" => EventType::Adoption, "BIRT" => EventType::Birth, "BURI" => EventType::Burial, @@ -64,8 +70,7 @@ impl Event { "RESI" => EventType::Residence, "OTHER" => EventType::Other, _ => panic!("Unrecognized event tag: {}", tag), - }; - Event::new(etype) + } } pub fn add_citation(&mut self, citation: SourceCitation) { @@ -113,3 +118,29 @@ pub trait HasEvents { places } } + +impl Parse for Event { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => self.date = Some(take_line_value(tokenizer)), + "PLAC" => self.place = Some(take_line_value(tokenizer)), + "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Event Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Event Token: {:?}", tokenizer.current_token), + } + } + } +} diff --git a/src/types/family.rs b/src/types/family.rs index c666ac4..1b5881e 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,4 +1,10 @@ -use crate::types::{event::HasEvents, Event}; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::{event::HasEvents, Event}, + util::{dbg, take_line_value}, +}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -8,7 +14,7 @@ type Xref = String; /// /// This data representation understands that HUSB & WIFE are just poorly-named /// pointers to individuals. no gender "validating" is done on parse. -#[derive(Debug)] +#[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Family { pub xref: Option, @@ -21,15 +27,13 @@ pub struct Family { impl Family { #[must_use] - pub fn new(xref: Option) -> Family { - Family { - xref, - individual1: None, - individual2: None, - children: Vec::new(), - num_children: None, - events: Vec::new(), - } + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Family { + let mut fam = Family::default(); + fam.xref = xref; + fam.children = Vec::new(); + fam.events = Vec::new(); + fam.parse(tokenizer, level); + fam } pub fn set_individual1(&mut self, xref: Xref) { @@ -51,6 +55,34 @@ impl Family { } } +impl Parse for Family { + /// parse handles FAM top-level tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip over FAM tag name + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "MARR" => self.add_event(Event::new(tokenizer, level + 1, "MARR")), + "HUSB" => self.set_individual1(take_line_value(tokenizer)), + "WIFE" => self.set_individual2(take_line_value(tokenizer)), + "CHIL" => self.add_child(take_line_value(tokenizer)), + _ => panic!("{} Unhandled Family Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Family Token: {:?}", tokenizer.current_token), + } + } + } +} + impl HasEvents for Family { fn add_event(&mut self, event: Event) -> () { let event_type = &event.event; diff --git a/src/types/header.rs b/src/types/header.rs index 9ba2afa..8a9bd27 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,4 +1,10 @@ -use crate::types::{Copyright, Corporation, Date, Note}; +use crate::util::dbg; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::{Copyright, Corporation, Date, Note}, + util::{parse_custom_tag, take_line_value}, +}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -10,11 +16,11 @@ use super::CustomData; #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Header { /// tag: GEDC - pub gedcom: Option, + pub gedcom: Option, /// tag: CHAR pub encoding: Option, /// tag: SOUR - pub source: Option, + pub source: Option, /// tag: DEST, an identifier for the system expected to receive this document. /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#DEST pub destination: Option, @@ -42,23 +48,115 @@ pub struct Header { } impl Header { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Header { + let mut header = Header::default(); + header.parse(tokenizer, level); + header + } + pub fn add_custom_data(&mut self, data: CustomData) { self.custom_data.push(data) } } -/// GedcomDocument (tag: GEDC) is a container for information about the entire document. It is +impl Parse for Header { + /// Parses HEAD top-level tag. See + /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // let mut head = Header::default(); + + // skip over HEAD tag name + tokenizer.next_token(); + + while tokenizer.current_token != Token::Level(level) { + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "GEDC" => self.gedcom = Some(GedcomDoc::new(tokenizer, 1)), + "SOUR" => self.source = Some(HeadSour::new(tokenizer, 1)), + "DEST" => self.destination = Some(take_line_value(tokenizer)), + "DATE" => self.date = Some(Date::new(tokenizer, 1)), + "SUBM" => self.submitter_tag = Some(take_line_value(tokenizer)), + "SUBN" => self.submission_tag = Some(take_line_value(tokenizer)), + "FILE" => self.filename = Some(take_line_value(tokenizer)), + "COPR" => self.copyright = Some(Copyright::new(tokenizer, 1)), + "CHAR" => self.encoding = Some(Encoding::new(tokenizer, 1)), + "LANG" => self.language = Some(take_line_value(tokenizer)), + "NOTE" => self.note = Some(Note::new(tokenizer, 1)), + "PLAC" => self.place = Some(HeadPlac::new(tokenizer, 1)), + _ => panic!("{} Unhandled Header Tag: {}", dbg(tokenizer), tag), + }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) + } + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Header Token: {:?}", &tokenizer.current_token), + } + } + } +} + +/// GedcomDoc (tag: GEDC) is a container for information about the entire document. It is /// recommended that applications write GEDC with its required subrecord VERS as the first /// substructure of a HEAD. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#GEDC #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct GedcomDocument { +pub struct GedcomDoc { /// tag: VERS pub version: Option, /// tag: FORM; see Gedcom 5.5.1 specification, p. 50 pub form: Option, } +impl GedcomDoc { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomDoc { + let mut gedc = GedcomDoc::default(); + gedc.parse(tokenizer, level); + gedc + } +} + +impl Parse for GedcomDoc { + /// parse handles parsing GEDC tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip GEDC tag + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "VERS" => self.version = Some(take_line_value(tokenizer)), + // this is the only value that makes sense. warn them otherwise. + "FORM" => { + let form = take_line_value(tokenizer); + if &form.to_uppercase() != "LINEAGE-LINKED" { + println!( + "WARNING: Unrecognized GEDCOM form. Expected LINEAGE-LINKED, found {}" + , form); + } + self.form = Some(form); + } + _ => panic!("{} Unhandled GEDC Tag: {}", dbg(&tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{} Unexpected GEDC Token: {:?}", + dbg(&tokenizer), + &tokenizer.current_token + ), + } + } + } +} + /// Encoding (tag: CHAR) is a code value that represents the character set to be used to /// interpret this data. See Gedcom 5.5.1 specification, p. 44 #[derive(Debug, Default)] @@ -69,13 +167,49 @@ pub struct Encoding { pub version: Option, } +impl Encoding { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Encoding { + let mut chars = Encoding::default(); + chars.parse(tokenizer, level); + chars + } +} + +impl Parse for Encoding { + /// parse handles the parsing of the CHARS tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "VERS" => self.version = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled CHAR Tag: {}", dbg(&tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{} Unexpected CHAR Token: {:?}", + dbg(&tokenizer), + &tokenizer.current_token + ), + } + } + } +} + /// HeadSource (tag: SOUR) is an identifier for the product producing the gedcom data. A /// registration process for these identifiers existed for a time, but no longer does. If an /// existing identifier is known, it should be used. Otherwise, a URI owned by the product should /// be used instead. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-SOUR #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct HeadSource { +pub struct HeadSour { pub value: Option, /// tag: VERS pub version: Option, @@ -87,6 +221,41 @@ pub struct HeadSource { pub data: Option, } +impl HeadSour { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> HeadSour { + let mut head_sour = HeadSour::default(); + head_sour.parse(tokenizer, level); + head_sour + } +} + +impl Parse for HeadSour { + /// parse handles the SOUR tag in a header + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "VERS" => self.version = Some(take_line_value(tokenizer)), + "NAME" => self.name = Some(take_line_value(tokenizer)), + "CORP" => self.corporation = Some(Corporation::new(tokenizer, level + 1)), + "DATA" => self.data = Some(HeadSourData::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled CHAR Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unexpected SOUR Token: {:?}", tokenizer.current_token), + } + } + } +} + /// The electronic data source or digital repository from which this dataset was exported. The /// payload is the name of that source, with substructures providing additional details about the /// source (not the export). See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-SOUR-DATA @@ -100,6 +269,42 @@ pub struct HeadSourData { pub copyright: Option, } +impl HeadSourData { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> HeadSourData { + let mut head_sour_data = HeadSourData::default(); + head_sour_data.parse(tokenizer, level); + head_sour_data + } +} + +impl Parse for HeadSourData { + /// parse parses the DATA tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "COPR" => self.copyright = Some(Copyright::new(tokenizer, level + 1)), + _ => panic!("{} unhandled DATA tag in header: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled SOUR tag in header: {:?}", + tokenizer.current_token + ), + } + } + } +} + /// HeadPlace (tag: PLAC) is is a placeholder for providing a default PLAC.FORM, and must not have /// a payload. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC #[derive(Debug, Default)] @@ -127,3 +332,48 @@ impl HeadPlac { self.form.remove(index); } } + +impl HeadPlac { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> HeadPlac { + let mut head_plac = HeadPlac::default(); + head_plac.parse(tokenizer, level); + head_plac + } +} + +impl Parse for HeadPlac { + /// parse handles the PLAC tag when present in header + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + + // In the header, PLAC should have no payload. See + // https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC + tokenizer.next_token(); + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "FORM" => { + let form = take_line_value(tokenizer); + let jurisdictional_titles = form.split(","); + + for t in jurisdictional_titles { + let v = t.trim(); + self.push_jurisdictional_title(v.to_string()); + } + } + _ => panic!("{} Unhandled PLAC tag in header: {}", dbg(&tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled PLAC tag in header: {:?}", + tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/individual.rs b/src/types/individual.rs index bf44314..6fc4bb5 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,4 +1,10 @@ -use crate::types::{event::HasEvents, CustomData, Event}; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::{event::HasEvents, CustomData, Event}, + util::{dbg, parse_custom_tag, take_line_value}, +}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -19,8 +25,8 @@ pub struct Individual { impl Individual { #[must_use] - pub fn new(xref: Option) -> Individual { - Individual { + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Individual { + let mut indi = Individual { xref, name: None, sex: Gender::Unknown, @@ -28,7 +34,9 @@ impl Individual { families: Vec::new(), custom_data: Vec::new(), last_updated: None, - } + }; + indi.parse(tokenizer, level); + indi } pub fn add_family(&mut self, link: FamilyLink) { @@ -58,17 +66,90 @@ impl HasEvents for Individual { } } +impl Parse for Individual { + /// parse handles the INDI top-level tag + fn parse(&mut self, tokenizer: &mut crate::tokenizer::Tokenizer, level: u8) { + // skip over INDI tag name + tokenizer.next_token(); + + while tokenizer.current_token != Token::Level(level) { + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "NAME" => self.name = Some(Name::new(tokenizer, level + 1)), + "SEX" => self.sex = Gender::new(tokenizer, level + 1), + "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" + | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" + | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { + let tag_clone = tag.clone(); + self.add_event(Event::new(tokenizer, level + 1, tag_clone.as_str())); + } + "FAMC" | "FAMS" => { + let tag_clone = tag.clone(); + self.add_family(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())); + } + "CHAN" => { + // assuming it always only has a single DATE subtag + tokenizer.next_token(); // level + tokenizer.next_token(); // DATE tag + self.last_updated = Some(take_line_value(tokenizer)); + } + _ => panic!("{} Unhandled Individual Tag: {}", dbg(tokenizer), tag), + }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) + } + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Individual Token: {:?}", tokenizer.current_token), + } + } + } +} + /// Gender of an `Individual` #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub enum Gender { Male, Female, - // come at me LDS, i support "N" as a gender value Nonbinary, Unknown, } +impl Gender { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Gender { + let mut gender = Gender::Unknown; + gender.parse(tokenizer, level); + gender + } +} + +impl Parse for Gender { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + tokenizer.next_token(); + if let Token::LineValue(gender_string) = &tokenizer.current_token { + *self = match gender_string.as_str() { + "M" => Gender::Male, + "F" => Gender::Female, + "N" => Gender::Nonbinary, + "U" => Gender::Unknown, + _ => panic!( + "{} Unknown gender value {} ({})", + dbg(tokenizer), + gender_string, + level + ), + }; + } else { + panic!( + "Expected gender LineValue, found {:?}", + tokenizer.current_token + ); + } + tokenizer.next_token(); + } +} + #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] enum FamilyLinkType { @@ -91,13 +172,16 @@ pub struct FamilyLink(Xref, FamilyLinkType, Option); impl FamilyLink { #[must_use] - pub fn new(xref: Xref, tag: &str) -> FamilyLink { + pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> FamilyLink { +let xref = take_line_value(tokenizer); let link_type = match tag { "FAMC" => FamilyLinkType::Child, "FAMS" => FamilyLinkType::Spouse, _ => panic!("Unrecognized family type tag: {}", tag), }; - FamilyLink(xref, link_type, None) + let mut family_link = FamilyLink(xref, link_type, None); + family_link.parse(tokenizer, level); + family_link } pub fn set_pedigree(&mut self, pedigree_text: &str) { @@ -111,6 +195,29 @@ impl FamilyLink { } } +impl Parse for FamilyLink { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "PEDI" => self.set_pedigree(take_line_value(tokenizer).as_str()), + _ => panic!("{} Unhandled FamilyLink Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled FamilyLink Token: {:?}", + tokenizer.current_token + ), + } + } + } +} + #[derive(Debug, Default, PartialEq)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Name { @@ -121,3 +228,37 @@ pub struct Name { pub surname_prefix: Option, pub suffix: Option, } + +impl Name { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Name { + let mut name = Name::default(); + name.parse(tokenizer, level); + name + } +} + +impl Parse for Name { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "GIVN" => self.given = Some(take_line_value(tokenizer)), + "NPFX" => self.prefix = Some(take_line_value(tokenizer)), + "NSFX" => self.suffix = Some(take_line_value(tokenizer)), + "SPFX" => self.surname_prefix = Some(take_line_value(tokenizer)), + "SURN" => self.surname = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled Name Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Name Token: {:?}", tokenizer.current_token), + } + } + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs index 5e79810..f21511e 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -11,7 +11,7 @@ pub mod event; pub use event::{Event, EventType}; pub mod date; -pub use date::{Date, ChangeDate}; +pub use date::{ChangeDate, Date}; mod address; pub use address::*; @@ -40,6 +40,9 @@ pub use note::*; mod translation; pub use translation::*; +mod repository; +pub use repository::*; + mod copyright; pub use copyright::*; @@ -52,38 +55,6 @@ pub use corporation::*; #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Media {} -/// Data repository, the `REPO` tag -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct Repository { - /// Optional reference to link to this repo - pub xref: Option, - /// Name of the repository - pub name: Option, - /// Physical address of the data repository - pub address: Option
, -} - -/// Citation linking a genealogy fact to a data `Source` -#[derive(Clone, Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct SourceCitation { - /// Reference to the `Source` - pub xref: Xref, - /// Page number of source - pub page: Option, -} - -/// Citation linking a `Source` to a data `Repository` -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct RepoCitation { - /// Reference to the `Repository` - pub xref: Xref, - /// Call number to find the source at this repository - pub call_number: Option, -} - #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct CustomData { diff --git a/src/types/note.rs b/src/types/note.rs index 01bd550..cf085cb 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -1,4 +1,11 @@ -use crate::types::{Translation, Source}; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::{Source, Translation}, + util::dbg, + util::take_line_value, +}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -30,3 +37,47 @@ pub struct Note { /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#LANG pub language: Option, } + +impl Note { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Note { + let mut note = Note::default(); + note.parse(tokenizer, level); + note + } +} + +impl Parse for Note { + /// parse handles the NOTE tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + let mut value = String::new(); + + value.push_str(&take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "MIME" => self.mime = Some(take_line_value(tokenizer)), + "TRANS" => self.translation = Some(Translation::new(tokenizer, level + 1)), + "LANG" => self.language = Some(take_line_value(tokenizer)), + "CONT" | "CONC" => { + value.push('\n'); + value.push_str(&take_line_value(tokenizer)); + } + _ => panic!("{} unhandled NOTE tag: {}", dbg(&tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), + } + } + if value != "" { + self.value = Some(value); + } + } +} diff --git a/src/types/repository.rs b/src/types/repository.rs new file mode 100644 index 0000000..ad3b8d5 --- /dev/null +++ b/src/types/repository.rs @@ -0,0 +1,101 @@ +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + util::{dbg, take_line_value}, +}; + +use super::{Address, Xref}; + +/// Data repository, the `REPO` tag +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Repository { + /// Optional reference to link to this repo + pub xref: Option, + /// Name of the repository + pub name: Option, + /// Physical address of the data repository + pub address: Option
, +} + +impl Repository { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Repository { + let mut repo = Repository { + xref, + name: None, + address: None, + }; + repo.parse(tokenizer, level); + repo + } +} + +impl Parse for Repository { + /// Parses REPO top-level tag. + fn parse(&mut self, tokenizer: &mut crate::tokenizer::Tokenizer, level: u8) { + // skip REPO tag + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "NAME" => self.name = Some(take_line_value(tokenizer)), + "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Repository Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Repository Token: {:?}", tokenizer.current_token), + } + } + } +} + +/// Citation linking a `Source` to a data `Repository` +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct RepoCitation { + /// Reference to the `Repository` + pub xref: Xref, + /// Call number to find the source at this repository + pub call_number: Option, +} + +impl RepoCitation { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> RepoCitation { + let mut rc = RepoCitation { + xref: take_line_value(tokenizer), + call_number: None, + }; + rc.parse(tokenizer, level); + rc + } +} + +impl Parse for RepoCitation { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CALN" => self.call_number = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled RepoCitation Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled RepoCitation Token: {:?}", + tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/source.rs b/src/types/source.rs index cfce2fd..739be0e 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,7 +1,15 @@ -use crate::types::{Event, RepoCitation}; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::{Event, RepoCitation}, + util::{dbg, take_line_value, take_continued_text}, +}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; +use super::Xref; + #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] /// Source for genealogy facts @@ -15,8 +23,8 @@ pub struct Source { impl Source { #[must_use] - pub fn new(xref: Option) -> Source { - Source { + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Source { + let mut sour = Source { xref, data: SourceData { events: Vec::new(), @@ -25,7 +33,9 @@ impl Source { abbreviation: None, title: None, repo_citations: Vec::new(), - } + }; + sour.parse(tokenizer, level); + sour } pub fn add_repo_citation(&mut self, citation: RepoCitation) { @@ -33,6 +43,39 @@ impl Source { } } +impl Parse for Source { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip SOUR tag + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATA" => tokenizer.next_token(), + "EVEN" => { + let events_recorded = take_line_value(tokenizer); + let mut event = Event::new(tokenizer, level + 2, "OTHER"); + event.with_source_data(events_recorded); + self.data.add_event(event); + } + "AGNC" => self.data.agency = Some(take_line_value(tokenizer)), + "ABBR" => self.abbreviation = Some(take_continued_text(tokenizer, level + 1)), + "TITL" => self.title = Some(take_continued_text(tokenizer, level + 1)), + "REPO" => self.add_repo_citation(RepoCitation::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Source Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Source Token: {:?}", tokenizer.current_token), + } + } + } +} + #[allow(clippy::module_name_repetitions)] #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -46,3 +89,45 @@ impl SourceData { self.events.push(event); } } + +/// Citation linking a genealogy fact to a data `Source` +#[derive(Clone, Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct SourceCitation { + /// Reference to the `Source` + pub xref: Xref, + /// Page number of source + pub page: Option, +} + +impl SourceCitation { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> SourceCitation { + let mut citation = SourceCitation { + xref: take_line_value(tokenizer), + page: None, + }; + citation.parse(tokenizer, level); + citation + } +} + +impl Parse for SourceCitation { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "PAGE" => self.page = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled Citation Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Citation Token: {:?}", tokenizer.current_token), + } + } + } +} diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 0af8d69..35f08d6 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -1,11 +1,17 @@ -use crate::types::Address; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::Address, + util::{dbg, take_line_value}, +}; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; type Xref = String; /// Submitter of the data, ie. who reported the genealogy fact -#[derive(Debug)] +#[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Submitter { /// Optional reference to link to this submitter @@ -23,15 +29,35 @@ pub struct Submitter { impl Submitter { /// Shorthand for creating a `Submitter` from its `xref` #[must_use] - pub fn new(xref: Option) -> Submitter { - Submitter { - xref, - name: None, - address: None, - phone: None, - language: None, - } + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Submitter { + let mut subm = Submitter::default(); + subm.xref = xref; + subm.parse(tokenizer, level); + subm } } +impl Parse for Submitter { + /// Parse handles SUBM top-level tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + + // skip over SUBM tag name + tokenizer.next_token(); + while tokenizer.current_token != Token::Level(level) { + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "NAME" => self.name = Some(take_line_value(tokenizer)), + "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), + "PHON" => self.phone = Some(take_line_value(tokenizer)), + "LANG" => self.language = Some(take_line_value(tokenizer)), + // TODO + // "CHAN" => submitter.change_date = Some(take_line_value(&mut self.tokenizer)), + _ => panic!("{} Unhandled Submitter Tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unhandled Submitter Token: {:?}", tokenizer.current_token), + } + } + } +} diff --git a/src/types/translation.rs b/src/types/translation.rs index bb1adee..f6fc188 100644 --- a/src/types/translation.rs +++ b/src/types/translation.rs @@ -1,6 +1,13 @@ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + util::dbg, + util::take_line_value, +}; + /// Translation (tag:TRAN) is a type of TRAN for unstructured human-readable text, such as /// is found in NOTE and SNOTE payloads. Each NOTE-TRAN must have either a LANG substructure or a /// MIME substructure or both. If either is missing, it is assumed to have the same value as the @@ -14,3 +21,39 @@ pub struct Translation { /// tag:LANG pub language: Option, } + +impl Translation { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Translation { + let mut tran = Translation::default(); + tran.parse(tokenizer, level); + tran + } +} + +impl Parse for Translation { + + ///parse handles the TRAN tag + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "MIME" => self.mime = Some(take_line_value(tokenizer)), + "LANG" => self.language = Some(take_line_value(tokenizer)), + _ => panic!("{} unhandled NOTE tag: {}", dbg(&tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), + } + } + } +} diff --git a/src/util.rs b/src/util.rs index 65eb740..b3308b0 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,3 +1,8 @@ +use crate::{ + tokenizer::{Token, Tokenizer}, + types::CustomData, +}; + /// Macro for displaying `Option`s in debug mode without the text wrapping. #[macro_export] macro_rules! fmt_optional_value { @@ -9,3 +14,68 @@ macro_rules! fmt_optional_value { } }; } + +/// Debug function displaying GEDCOM line number of error message. +pub fn dbg(tokenizer: &Tokenizer) -> String { + format!("line {}:", tokenizer.line) +} + +/// Grabs and returns to the end of the current line as a String +pub fn take_line_value(tokenizer: &mut Tokenizer) -> String { + let value: String; + tokenizer.next_token(); + + if let Token::LineValue(val) = &tokenizer.current_token { + value = val.to_string(); + } else { + panic!( + "{} Expected LineValue, found {:?}", + dbg(&tokenizer), + tokenizer.current_token + ); + } + tokenizer.next_token(); + value +} + +pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> CustomData { + let value = take_line_value(tokenizer); + CustomData { tag, value } +} + +/// Takes the value of the current line including handling +/// multi-line values from CONT & CONC tags. +pub fn take_continued_text(tokenizer: &mut Tokenizer, level: u8) -> String { + let mut value = take_line_value(tokenizer); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONT" => { + value.push('\n'); + value.push_str(&take_line_value(tokenizer)) + } + "CONC" => { + value.push(' '); + value.push_str(&take_line_value(tokenizer)) + } + _ => panic!( + "{} Unhandled Continuation Tag: {}", + dbg(tokenizer), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled Continuation Token: {:?}", + tokenizer.current_token + ), + } + } + value +} diff --git a/tests/header.rs b/tests/header.rs index 2bd2cd6..cbfa8bf 100644 --- a/tests/header.rs +++ b/tests/header.rs @@ -14,7 +14,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let head_gedc = data.header.gedcom.unwrap(); + let head_gedc = data.header.unwrap().gedcom.unwrap(); assert_eq!(head_gedc.version.unwrap(), "5.5"); assert_eq!(head_gedc.form.unwrap(), "LINEAGE-LINKED"); } @@ -48,7 +48,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let sour = data.header.source.unwrap(); + let sour = data.header.unwrap().source.unwrap(); assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); let vers = sour.version.unwrap(); @@ -98,7 +98,7 @@ mod tests { let data = parser.parse_record(); assert_eq!( - data.header.destination.unwrap(), + data.header.unwrap().destination.unwrap(), "Destination of transmission" ); } @@ -116,7 +116,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_date = data.header.date.unwrap(); + let h_date = data.header.unwrap().date.unwrap(); assert_eq!(h_date.value.unwrap(), "1 JAN 1998"); assert_eq!(h_date.time.unwrap(), "13:57:24.80"); } @@ -135,7 +135,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_subm = data.header.submitter_tag.unwrap(); + let h_subm = data.header.unwrap().submitter_tag.unwrap(); assert_eq!(h_subm.as_str(), "@SUBMITTER@"); } @@ -153,7 +153,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_subn = data.header.submission_tag.unwrap(); + let h_subn = data.header.unwrap().submission_tag.unwrap(); assert_eq!(h_subn.as_str(), "@SUBMISSION@"); } @@ -171,7 +171,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_file = data.header.filename.unwrap(); + let h_file = data.header.unwrap().filename.unwrap(); assert_eq!(h_file.as_str(), "ALLGED.GED"); } @@ -188,7 +188,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_copr = data.header.copyright.unwrap(); + let h_copr = data.header.unwrap().copyright.unwrap(); assert_eq!(h_copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); assert_eq!( h_copr.continued.unwrap(), @@ -209,7 +209,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_char = data.header.encoding.unwrap(); + let h_char = data.header.unwrap().encoding.unwrap(); assert_eq!(h_char.value.unwrap(), "ASCII"); assert_eq!( h_char.version.unwrap(), @@ -229,7 +229,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_lang = data.header.language.unwrap(); + let h_lang = data.header.unwrap().language.unwrap(); assert_eq!(h_lang.as_str(), "language"); } @@ -246,7 +246,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_plac = data.header.place.unwrap(); + let h_plac = data.header.unwrap().place.unwrap(); assert_eq!(h_plac.form[0], "City"); assert_eq!(h_plac.form[1], "County"); assert_eq!(h_plac.form[2], "State"); @@ -281,7 +281,7 @@ mod tests { let mut parser = Parser::new(sample.chars()); let data = parser.parse_record(); - let h_note = data.header.note.unwrap(); + let h_note = data.header.unwrap().note.unwrap(); assert_eq!(h_note.value.unwrap().chars().count(), 1441); } } diff --git a/tests/lib.rs b/tests/lib.rs index 2127ea0..4eba85e 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -25,13 +25,15 @@ mod tests { assert_eq!(data.families.len(), 1); assert_eq!(data.submitters.len(), 1); + let header = data.header.unwrap(); + // header assert_eq!( - data.header.encoding.unwrap().value.unwrap().as_str(), + header.encoding.unwrap().value.unwrap().as_str(), "ASCII" ); - assert_eq!(data.header.submitter_tag.unwrap().as_str(), "@SUBMITTER@"); - assert_eq!(data.header.gedcom.unwrap().version.unwrap(), "5.5"); + assert_eq!(header.submitter_tag.unwrap().as_str(), "@SUBMITTER@"); + assert_eq!(header.gedcom.unwrap().version.unwrap(), "5.5"); // names assert_eq!( From 4afc2b1e81bb3ce581c25c62d17b8cc2c71b678c Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 7 Nov 2022 22:32:42 -0600 Subject: [PATCH 24/55] Resolve conflicts after merge with parse-trait --- src/bin.rs | 4 +- src/lib.rs | 4 +- src/tree.rs | 34 +---------------- src/types/family_link.rs | 80 ---------------------------------------- src/types/mod.rs | 3 -- 5 files changed, 6 insertions(+), 119 deletions(-) delete mode 100644 src/types/family_link.rs diff --git a/src/bin.rs b/src/bin.rs index a8797bd..f9c59a6 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,5 +1,5 @@ use gedcom::parser::Parser; -use gedcom::Gedcom; +use gedcom::GedcomData; use std::env; use std::fs; use std::path::PathBuf; @@ -18,7 +18,7 @@ fn main() { usage(""); } - let data: Gedcom; + let data: GedcomData; if let Ok(contents) = read_relative(filename) { let mut parser = Parser::new(contents.chars()); diff --git a/src/lib.rs b/src/lib.rs index cc9ec86..7adf1e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,11 +27,11 @@ pub mod tokenizer; pub mod types; mod tree; -pub use tree::Gedcom; +pub use tree::GedcomData; #[must_use] /// Helper function for converting GEDCOM file content stream to parsed data. -pub fn parse(content: std::str::Chars) -> Gedcom { +pub fn parse(content: std::str::Chars) -> GedcomData { let mut p = parser::Parser::new(content); p.parse_record() } diff --git a/src/tree.rs b/src/tree.rs index 282e1fd..3d0f82f 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] /// The data structure representing all the data within a gedcom file -pub struct Gedcom { +pub struct GedcomData { /// Header containing file metadata pub header: Option
, /// List of submitters of the facts @@ -23,20 +23,7 @@ pub struct Gedcom { } // should maybe store these by xref if available? -impl Gedcom { - // pub(crate) fn add(&mut self, data: &Box) { - // match data.get_type() { - // GedcomDataType::Family(family) => self.families.push(family), - // GedcomDataType::Header(header) => self.header = header, - // GedcomDataType::Individual(person) => self.individuals.push(person), - // GedcomDataType::Media(media) => self.multimedia.push(media), - // GedcomDataType::Repository(repo) => self.repositories.push(repo), - // GedcomDataType::Source(source) => self.sources.push(source), - // GedcomDataType::Submitter(submitter) => self.submitters.push(submitter), - // GedcomDataType::Other(s) => println!("Unhandled datatype: {}", s), - // } - // } - +impl GedcomData { /// Adds a `Family` (a relationship between individuals) to the tree pub fn add_family(&mut self, family: Family) { self.families.push(family); @@ -76,20 +63,3 @@ impl Gedcom { println!("----------------------"); } } - -// /// Type of data that can be added to a Gedcom tree. -// #[derive(Debug)] -// pub(crate) enum GedcomDataType { -// Family(Family), -// Header(Header), -// Individual(Individual), -// Media(Media), -// Repository(Repository), -// Source(Source), -// Submitter(Submitter), -// Other(String), -// } - -// pub(crate) trait GedcomData { -// fn get_type(&self) -> GedcomDataType; -// } diff --git a/src/types/family_link.rs b/src/types/family_link.rs deleted file mode 100644 index 3b793c6..0000000 --- a/src/types/family_link.rs +++ /dev/null @@ -1,80 +0,0 @@ -use crate::parser::{Parsable, Parser, ParsingError}; -use crate::tokenizer::Token; - -#[cfg(feature = "json")] -use serde::{Deserialize, Serialize}; - -type Xref = String; - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct FamilyLink(pub Xref, pub Relation, pub Option); - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub enum Relation { - Spouse, - Child, -} - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub enum Pedigree { - Adopted, - Birth, - Foster, - Sealing, -} - -impl FamilyLink { - #[must_use] - pub fn new(xref: Xref, tag: &str) -> FamilyLink { - let link_type = match tag { - "FAMC" => Relation::Child, - "FAMS" => Relation::Spouse, - _ => panic!("Unrecognized family type tag: {}", tag), - }; - FamilyLink(xref, link_type, None) - } - - pub fn set_pedigree(&mut self, pedigree_text: &str) { - self.2 = match pedigree_text.to_lowercase().as_str() { - "adopted" => Some(Pedigree::Adopted), - "birth" => Some(Pedigree::Birth), - "foster" => Some(Pedigree::Foster), - "sealing" => Some(Pedigree::Sealing), - _ => panic!("Unrecognized family link pedigree: {}", pedigree_text), - }; - } -} - -impl Parsable for FamilyLink { - fn parse(parser: &mut Parser) -> Result { - let base_lvl = parser.level; - let tag = parser.take_tag(); - let relation = match tag { - "FAMC" => Relation::Child, - "FAMS" => Relation::Spouse, - _ => panic!("Unrecognized family type tag: {}", tag), - }; - let mut link = FamilyLink(parser.take_line_value(), relation, None); - - loop { - if let Token::Level(cur_level) = parser.tokenizer.current_token { - if cur_level <= base_lvl { - break; - } - } - match &parser.tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PEDI" => link.set_pedigree(parser.take_line_value().as_str()), - _ => parser.skip_current_tag("FamilyLink"), - }, - Token::Level(_) => parser.set_level(), - _ => parser.handle_unexpected_token("FamilyLink"), - } - } - - Ok(link) - } -} diff --git a/src/types/mod.rs b/src/types/mod.rs index 02c963d..f21511e 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -28,9 +28,6 @@ pub use individual::*; mod family; pub use family::*; -mod family_link; -pub use family_link::FamilyLink; - mod submitter; pub use submitter::*; From d3dab7c3cc31f3cc2a5a09d865e9c4a11cbd25a3 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Tue, 8 Nov 2022 17:11:52 -0600 Subject: [PATCH 25/55] Fix warning in tests/json_feature.rs --- README.md | 71 +++++++++++++++++++++++++++++++++++++++++++ tests/json_feature.rs | 3 -- 2 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..21ae083 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# rust-gedcom + + + + + + + + +> A gedcom parser written in rust 🦀 + +## About this project + +GEDCOM is a file format for sharing genealogical information like family trees. + +I wanted experience playing with parsers and representing tree structures in Rust, and noticed a parser for Rust did not exist. And thus, this project was born! A fun experiment to practice my Rust abilities. + +It hopes to be ~~fully~~ mostly compliant with the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). + +I have found this [5.5.2 specification](https://jfcardinal.github.io/GEDCOM-5.5.2/gedcom-5.5.2.html) useful in its assessment of which tags are worth supporting or not. + +## Usage + +This crate comes in two parts. The first is a binary called `parse_gedcom`, mostly used for my testing & development. It prints the `GedcomData` object and some stats about the gedcom file passed into it: +```bash +parse_gedcom ./tests/fixtures/sample.ged + +# outputs tree data here w/ stats +# ---------------------- +# | Gedcom Data Stats: | +# ---------------------- +# submitters: 1 +# individuals: 3 +# families: 2 +# repositories: 1 +# sources: 1 +# multimedia: 0 +# ---------------------- +``` + +The second is a library containing the parser. + +## JSON Serializing/Deserializing with `serde` +This crate has an optional feature called `json` that implements `Serialize` & `Deserialize` for the gedcom data structure. This allows you to easily integrate with the web. + +For more info about serde, [check them out](https://serde.rs/)! + +The feature is not enabled by default. There are zero dependencies if just using the gedcom parsing functionality. + +Use the json feature with any version >=0.2.1 by adding the following to your Cargo.toml: +```toml +gedcom = { version = "", features = ["json"] } +``` + +## 🚧 Progress 🚧 + +There are still parts of the specification not yet implemented and the project is subject to change. The way I have been developing is to take a gedcom file, attempt to parse it and act on whatever errors or omissions occur. In it's current state, it is capable of parsing the [sample.ged](tests/fixtures/sample.ged) in its entirety. + +Here are some notes about parsed data & tags. Page references are to the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). + +### Top-level tags + +* `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. +* `MULTIMEDIA_RECORD` - p.26 - Multimedia (`OBJE`) is not currently parsed. +* `NOTE_RECORD` - p.27 - Notes (`NOTE`) are also unhandled. (except in header) + +Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. + +## License + +© 2021, [Robert Pirtle](https://robert.pirtle.xyz/). licensed under [MIT](license.md). diff --git a/tests/json_feature.rs b/tests/json_feature.rs index 19b7ad8..9d9fc29 100644 --- a/tests/json_feature.rs +++ b/tests/json_feature.rs @@ -1,6 +1,3 @@ -#[cfg(test)] -mod lib; - #[cfg(test)] #[cfg(feature = "json")] mod json_feature_tests { From 4be608ce6043a032af0b1cd4397d3d47b2c53356 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Tue, 15 Nov 2022 19:46:29 -0600 Subject: [PATCH 26/55] Handle multimedia_record --- src/parser.rs | 65 +--------- src/tree.rs | 85 ++++++++++++- src/types/address.rs | 11 +- src/types/custom.rs | 6 + src/types/date.rs | 54 ++++++-- src/types/individual.rs | 49 ++++++-- src/types/mod.rs | 17 +-- src/types/multimedia.rs | 245 ++++++++++++++++++++++++++++++++++++ src/types/note.rs | 3 +- src/types/source.rs | 16 ++- tests/fixtures/long-url.ged | 8 ++ tests/header.rs | 2 +- tests/lib.rs | 2 + tests/multimedia.rs | 117 +++++++++++++++++ 14 files changed, 572 insertions(+), 108 deletions(-) create mode 100644 src/types/custom.rs create mode 100644 src/types/multimedia.rs create mode 100644 tests/fixtures/long-url.ged create mode 100644 tests/multimedia.rs diff --git a/src/parser.rs b/src/parser.rs index d4ed1af..f5ea585 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,7 @@ //! The state machine that parses a char iterator of the gedcom's contents -use std::{panic, str::Chars}; - -use crate::tokenizer::{Token, Tokenizer}; +use std::str::Chars; +use crate::tokenizer::Tokenizer; use crate::tree::GedcomData; -use crate::types::{Family, Header, Individual, Repository, Source, Submitter}; -use crate::util::{dbg, parse_custom_tag}; /// Parse converts a subset of a token list into a type's data structure. pub trait Parse { @@ -28,62 +25,6 @@ impl<'a> Parser<'a> { /// Does the actual parsing of the record. pub fn parse_record(&mut self) -> GedcomData { - let mut data = GedcomData::default(); - loop { - let level = match self.tokenizer.current_token { - Token::Level(n) => n, - _ => panic!( - "{} Expected Level, found {:?}", - dbg(&self.tokenizer), - self.tokenizer.current_token - ), - }; - - self.tokenizer.next_token(); - - let mut pointer: Option = None; - if let Token::Pointer(xref) = &self.tokenizer.current_token { - pointer = Some(xref.to_string()); - self.tokenizer.next_token(); - } - - if let Token::Tag(tag) = &self.tokenizer.current_token { - match tag.as_str() { - "HEAD" => data.header = Some(Header::new(&mut self.tokenizer, 0)), - "FAM" => data.add_family(Family::new(&mut self.tokenizer, 0, pointer)), - "INDI" => { - data.add_individual(Individual::new(&mut self.tokenizer, level, pointer)) - } - "REPO" => data.add_repository(Repository::new(&mut self.tokenizer, level, pointer)), - "SOUR" => data.add_source(Source::new(&mut self.tokenizer, level, pointer)), - "SUBM" => data.add_submitter(Submitter::new(&mut self.tokenizer, 0, pointer)), - "TRLR" => break, - _ => { - println!("{} Unhandled tag {}", dbg(&self.tokenizer), tag); - self.tokenizer.next_token(); - } - }; - } else if let Token::CustomTag(tag) = &self.tokenizer.current_token { - // TODO - let tag_clone = tag.clone(); - let custom_data = parse_custom_tag(&mut self.tokenizer, tag_clone); - println!( - "{} Skipping top-level custom tag: {:?}", - dbg(&self.tokenizer), - custom_data - ); - while self.tokenizer.current_token != Token::Level(0) { - self.tokenizer.next_token(); - } - } else { - println!( - "{} Unhandled token {:?}", - dbg(&self.tokenizer), - self.tokenizer.current_token - ); - self.tokenizer.next_token(); - }; - } - data + GedcomData::new(&mut self.tokenizer, 0) } } diff --git a/src/tree.rs b/src/tree.rs index 3d0f82f..ff63dbc 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,4 +1,9 @@ -use crate::types::{Family, Header, Individual, Media, Repository, Source, Submitter}; +use crate::{ + parser::Parse, + tokenizer::{Tokenizer, Token}, + types::{Family, Header, Individual, Multimedia, Repository, Source, Submitter}, + util::{dbg, parse_custom_tag}, +}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -19,11 +24,19 @@ pub struct GedcomData { /// Sources of facts. _ie._ book, document, census, etc. pub sources: Vec, /// A multimedia asset linked to a fact - pub multimedia: Vec, + pub multimedia: Vec, } // should maybe store these by xref if available? impl GedcomData { + /// contructor for GedcomData + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomData { + let mut data = GedcomData::default(); + data.parse(tokenizer, level); + data + } + /// Adds a `Family` (a relationship between individuals) to the tree pub fn add_family(&mut self, family: Family) { self.families.push(family); @@ -49,6 +62,11 @@ impl GedcomData { self.submitters.push(submitter); } + /// Adds a `Multimedia` to the tree + pub fn add_multimedia(&mut self, multimedia: Multimedia) { + self.multimedia.push(multimedia); + } + /// Outputs a summary of data contained in the tree to stdout pub fn stats(&self) { println!("----------------------"); @@ -63,3 +81,66 @@ impl GedcomData { println!("----------------------"); } } + +impl Parse for GedcomData { + /// Does the actual parsing of the record. + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + loop { + // TODO is this necessary? + let current_level = match tokenizer.current_token { + Token::Level(n) => n, + _ => panic!( + "{} Expected Level, found {:?}", + dbg(tokenizer), + tokenizer.current_token + ), + }; + + tokenizer.next_token(); + + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + + if let Token::Tag(tag) = &tokenizer.current_token { + match tag.as_str() { + "HEAD" => self.header = Some(Header::new(tokenizer, level)), + "FAM" => self.add_family(Family::new(tokenizer, level, pointer)), + "INDI" => { + self.add_individual(Individual::new(tokenizer, current_level, pointer)) + } + "REPO" => self.add_repository(Repository::new(tokenizer, current_level, pointer)), + "SOUR" => self.add_source(Source::new(tokenizer, current_level, pointer)), + "SUBM" => self.add_submitter(Submitter::new(tokenizer, level, pointer)), + "OBJE" => self.add_multimedia(Multimedia::new(tokenizer, level, pointer)), + "TRLR" => break, + _ => { + println!("{} Unhandled tag {}", dbg(tokenizer), tag); + tokenizer.next_token(); + } + }; + } else if let Token::CustomTag(tag) = &tokenizer.current_token { + // TODO + let tag_clone = tag.clone(); + let custom_data = parse_custom_tag(tokenizer, tag_clone); + println!( + "{} Skipping top-level custom tag: {:?}", + dbg(tokenizer), + custom_data + ); + while tokenizer.current_token != Token::Level(level) { + tokenizer.next_token(); + } + } else { + println!( + "{} Unhandled token {:?}", + dbg(tokenizer), + tokenizer.current_token + ); + tokenizer.next_token(); + }; + } + } +} diff --git a/src/types/address.rs b/src/types/address.rs index 5d4fe1e..c78a9c8 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -23,17 +23,17 @@ pub struct Address { } impl Address { - pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Address { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Address { let mut addr = Address::default(); addr.parse(tokenizer, level); addr - } + } } impl Parse for Address { /// parse handles ADDR tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - // skip ADDR tag tokenizer.next_token(); @@ -67,10 +67,7 @@ impl Parse for Address { _ => panic!("{} Unhandled Address Tag: {}", dbg(tokenizer), tag), }, Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled Address Token: {:?}", - tokenizer.current_token - ), + _ => panic!("Unhandled Address Token: {:?}", tokenizer.current_token), } } diff --git a/src/types/custom.rs b/src/types/custom.rs new file mode 100644 index 0000000..d640dfd --- /dev/null +++ b/src/types/custom.rs @@ -0,0 +1,6 @@ +#[derive(Clone, Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct CustomData { + pub tag: String, + pub value: String, +} diff --git a/src/types/date.rs b/src/types/date.rs index 9f128e1..5b56e5f 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -7,6 +7,8 @@ use crate::{ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; +use super::Note; + /// TODO Date should encompasses a number of date formats, e.g. approximated, period, phrase and range. #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -16,6 +18,13 @@ pub struct Date { } impl Date { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Date { + let mut date = Date::default(); + date.parse(tokenizer, level); + date + } + /// datetime returns Date and Date.time in a single string. pub fn datetime(&self) -> Option { match &self.time { @@ -31,15 +40,6 @@ impl Date { } } -impl Date { - #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Date { - let mut date = Date::default(); - date.parse(tokenizer, level); - date - } -} - impl Parse for Date { /// parse handles the DATE tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { @@ -69,6 +69,40 @@ impl Parse for Date { #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct ChangeDate { + pub value: Option, pub date: Option, - pub note: Option, + pub note: Option, +} + +impl ChangeDate { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> ChangeDate { + let mut date = ChangeDate::default(); + date.parse(tokenizer, level); + date + } +} + +impl Parse for ChangeDate { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + _ => panic!("{} unhandled ChangeDate tag: {}", dbg(tokenizer), tag), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!("Unexpected ChangeDate token: {:?}", tokenizer.current_token), + } + } + } + } } diff --git a/src/types/individual.rs b/src/types/individual.rs index 6fc4bb5..1acb25a 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,7 +1,7 @@ use crate::{ parser::Parse, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, CustomData, Event}, + types::{event::HasEvents, CustomData, Event, Multimedia, SourceCitation}, util::{dbg, parse_custom_tag, take_line_value}, }; @@ -20,6 +20,8 @@ pub struct Individual { pub families: Vec, pub custom_data: Vec, pub last_updated: Option, + pub source: Vec, + pub multimedia: Vec, events: Vec, } @@ -34,6 +36,8 @@ impl Individual { families: Vec::new(), custom_data: Vec::new(), last_updated: None, + source: Vec::new(), + multimedia: Vec::new(), }; indi.parse(tokenizer, level); indi @@ -55,6 +59,14 @@ impl Individual { pub fn add_custom_data(&mut self, data: CustomData) { self.custom_data.push(data) } + + pub fn add_source_citation(&mut self, sour: SourceCitation) { + self.source.push(sour); + } + + pub fn add_multimedia(&mut self, multimedia: Multimedia) { + self.multimedia.push(multimedia); + } } impl HasEvents for Individual { @@ -79,13 +91,14 @@ impl Parse for Individual { "SEX" => self.sex = Gender::new(tokenizer, level + 1), "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" - | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" => { + | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" + | "MARR" => { let tag_clone = tag.clone(); self.add_event(Event::new(tokenizer, level + 1, tag_clone.as_str())); } "FAMC" | "FAMS" => { let tag_clone = tag.clone(); - self.add_family(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())); + self.add_family(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())); } "CHAN" => { // assuming it always only has a single DATE subtag @@ -93,6 +106,11 @@ impl Parse for Individual { tokenizer.next_token(); // DATE tag self.last_updated = Some(take_line_value(tokenizer)); } + "SOUR" => { + self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); + } + // TODO handle xref + "OBJE" => self.add_multimedia(Multimedia::new(tokenizer, level + 1, None)), _ => panic!("{} Unhandled Individual Tag: {}", dbg(tokenizer), tag), }, Token::CustomTag(tag) => { @@ -173,7 +191,7 @@ pub struct FamilyLink(Xref, FamilyLinkType, Option); impl FamilyLink { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> FamilyLink { -let xref = take_line_value(tokenizer); + let xref = take_line_value(tokenizer); let link_type = match tag { "FAMC" => FamilyLinkType::Child, "FAMS" => FamilyLinkType::Spouse, @@ -209,16 +227,13 @@ impl Parse for FamilyLink { _ => panic!("{} Unhandled FamilyLink Tag: {}", dbg(tokenizer), tag), }, Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled FamilyLink Token: {:?}", - tokenizer.current_token - ), + _ => panic!("Unhandled FamilyLink Token: {:?}", tokenizer.current_token), } } } } -#[derive(Debug, Default, PartialEq)] +#[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Name { pub value: Option, @@ -227,14 +242,27 @@ pub struct Name { pub prefix: Option, pub surname_prefix: Option, pub suffix: Option, + pub source: Vec, } impl Name { pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Name { - let mut name = Name::default(); + let mut name = Name { + value: None, + given: None, + surname: None, + prefix: None, + surname_prefix: None, + suffix: None, + source: Vec::new(), + }; name.parse(tokenizer, level); name } + + pub fn add_source_citation(&mut self, sour: SourceCitation) { + self.source.push(sour); + } } impl Parse for Name { @@ -254,6 +282,7 @@ impl Parse for Name { "NSFX" => self.suffix = Some(take_line_value(tokenizer)), "SPFX" => self.surname_prefix = Some(take_line_value(tokenizer)), "SURN" => self.surname = Some(take_line_value(tokenizer)), + "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Name Tag: {}", dbg(tokenizer), tag), }, Token::Level(_) => tokenizer.next_token(), diff --git a/src/types/mod.rs b/src/types/mod.rs index f21511e..7524546 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -49,15 +49,8 @@ pub use copyright::*; mod corporation; pub use corporation::*; -// TODO -/// Multimedia item -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct Media {} - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct CustomData { - pub tag: String, - pub value: String, -} +mod multimedia; +pub use multimedia::*; + +mod custom; +pub use custom::*; diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs new file mode 100644 index 0000000..2c7ce0b --- /dev/null +++ b/src/types/multimedia.rs @@ -0,0 +1,245 @@ +use crate::{ + parser::Parse, + tokenizer::{Token, Tokenizer}, + types::{SourceCitation, Note, Xref}, + util::{dbg, take_line_value}, +}; + +use super::ChangeDate; + +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] + +/// The multimedia record refers to 1 or more external digital files, and may provide some +/// additional information about the files and the media they encode. +/// +/// The file reference can occur more than once to group multiple files together. Grouped files +/// should each pertain to the same context. For example, a sound clip and a photo both of the same +/// event might be grouped in a single OBJE. +/// +/// The change and creation dates should be for the OBJE record itself, not the underlying files. +/// +/// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#MULTIMEDIA_RECORD. +pub struct Multimedia { + /// Optional reference to link to this submitter + pub xref: Option, + pub file: Option, + /// The 5.5 spec, page 26, shows FILE as a sub-structure of FILE, but the struct appears as a + /// sibling in an Ancestry.com export. + pub form: Option, + /// The 5.5 spec, page 26, shows TITL as a sub-structure of FILE, but the struct appears as a + /// sibling in an Ancestry.com export. + pub title: Option, + pub user_reference_number: Option, + pub automated_record_id: Option, + pub source_citation: Option, + pub change_date: Option, + pub note_structure: Option, +} + +impl Multimedia { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Multimedia { + let mut obje = Multimedia{ + xref, + file: None, + form: None, + title: None, + user_reference_number: None, + automated_record_id: None, + source_citation: None, + change_date: None, + note_structure: None, + }; + obje.parse(tokenizer, level); + obje + } +} + +impl Parse for Multimedia { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip current line + tokenizer.next_token(); + loop { + if let Token::Level(curl_level) = tokenizer.current_token { + if curl_level <= level { + break; + } + } + tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), + "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), + "TITL" => self.title = Some(take_line_value(tokenizer)), + "REFN" => self.user_reference_number = Some(UserReferenceNumber::new(tokenizer, level + 1)), + "RIN" => self.automated_record_id = Some(take_line_value(tokenizer)), + "NOTE" => self.note_structure = Some(Note::new(tokenizer, level + 1)), + "SOUR" => self.source_citation = Some(SourceCitation::new(tokenizer, level + 1)), + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + _ => panic!( + "{} Unhandled Multimedia Tag: {}", + dbg(tokenizer), + tag + ), + }, + _ => panic!( + "Unhandled Multimedia Token: {:?}", + tokenizer.current_token + ), + } + } + } +} + +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] + +/// A complete local or remote file reference to the auxiliary data to be linked to the GEDCOM +/// context. Remote reference would include a network address where the multimedia data may +/// be obtained. +pub struct MultimediaFileRefn { + pub value: Option, + pub title: Option, + pub form: Option, +} + +impl MultimediaFileRefn { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> MultimediaFileRefn { + let mut file = MultimediaFileRefn::default(); + file.parse(tokenizer, level); + file + } +} + +impl Parse for MultimediaFileRefn { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + loop { + if let Token::Level(curl_level) = &tokenizer.current_token { + if curl_level <= &level { + break; + } + } + tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "TITL" => self.title = Some(take_line_value(tokenizer)), + "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), + _ => panic!( + "{} Unhandled MultimediaFileRefn Tag: {}", + dbg(tokenizer), + tag + ), + }, + _ => panic!( + "Unhandled MultimediaFileRefn Token: {:?}", + tokenizer.current_token + ), + } + } + } +} + +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] + +/// Indicates the format of the multimedia data associated with the specific GEDCOM context. This +/// allows processors to determine whether they can process the data object. Any linked files should +/// contain the data required, in the indicated format, to process the file data. +/// +/// NOTE: The 5.5 spec lists the following seven formats [ bmp | gif | jpg | ole | pcx | tif | wav ]. +/// However, we're leaving this open for emerging formats, Option. +pub struct MultimediaFormat { + pub value: Option, + pub source_media_type: Option, +} + +impl MultimediaFormat { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> MultimediaFormat { + let mut form = MultimediaFormat::default(); + form.parse(tokenizer, level); + form + } +} + +impl Parse for MultimediaFormat { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + loop { + if let Token::Level(curl_level) = &tokenizer.current_token { + if curl_level <= &level { + break; + } + } + tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "TYPE" => self.source_media_type = Some(take_line_value(tokenizer)), + _ => panic!( + "{} Unhandled MultimediaFormat Tag: {}", + dbg(tokenizer), + tag + ), + }, + _ => panic!( + "Unhandled MultimediaFormat Token: {:?}", + tokenizer.current_token + ), + } + } + } +} + +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] + +/// A user-defined number or text that the submitter uses to identify this record. For instance, it +/// may be a record number within the submitter's automated or manual system, or it may be a page +/// and position number on a pedigree chart. +pub struct UserReferenceNumber { + /// line value + pub value: Option, + /// A user-defined definition of the USER_REFERENCE_NUMBER. + pub user_reference_type: Option, +} + +impl UserReferenceNumber { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> UserReferenceNumber { + let mut refn = UserReferenceNumber::default(); + refn.parse(tokenizer, level); + refn + } +} + +impl Parse for UserReferenceNumber { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.value = Some(take_line_value(tokenizer)); + + loop { + if let Token::Level(curl_level) = &tokenizer.current_token { + if curl_level <= &level { + break; + } + } + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "TYPE" => self.user_reference_type = Some(take_line_value(tokenizer)), + _ => panic!( + "{} Unhandled UserReferenceNumber Tag: {}", + dbg(tokenizer), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled UserReferenceNumber Token: {:?}", + tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/note.rs b/src/types/note.rs index cf085cb..7a54bb1 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -66,7 +66,8 @@ impl Parse for Note { "MIME" => self.mime = Some(take_line_value(tokenizer)), "TRANS" => self.translation = Some(Translation::new(tokenizer, level + 1)), "LANG" => self.language = Some(take_line_value(tokenizer)), - "CONT" | "CONC" => { + "CONC" => value.push_str(&take_line_value(tokenizer)), + "CONT" => { value.push('\n'); value.push_str(&take_line_value(tokenizer)); } diff --git a/src/types/source.rs b/src/types/source.rs index 739be0e..3a02f09 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,14 +1,14 @@ use crate::{ parser::Parse, tokenizer::{Token, Tokenizer}, - types::{Event, RepoCitation}, - util::{dbg, take_line_value, take_continued_text}, + types::{Event, RepoCitation, CustomData}, + util::{dbg, take_continued_text, take_line_value, parse_custom_tag}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use super::Xref; +use super::{Xref}; #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -98,6 +98,7 @@ pub struct SourceCitation { pub xref: Xref, /// Page number of source pub page: Option, + pub custom_data: Vec, } impl SourceCitation { @@ -106,10 +107,15 @@ impl SourceCitation { let mut citation = SourceCitation { xref: take_line_value(tokenizer), page: None, + custom_data: Vec::new(), }; citation.parse(tokenizer, level); citation } + + pub fn add_custom_data(&mut self, data: CustomData) { + self.custom_data.push(data) + } } impl Parse for SourceCitation { @@ -125,6 +131,10 @@ impl Parse for SourceCitation { "PAGE" => self.page = Some(take_line_value(tokenizer)), _ => panic!("{} Unhandled Citation Tag: {}", dbg(tokenizer), tag), }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) + } Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Citation Token: {:?}", tokenizer.current_token), } diff --git a/tests/fixtures/long-url.ged b/tests/fixtures/long-url.ged new file mode 100644 index 0000000..247368a --- /dev/null +++ b/tests/fixtures/long-url.ged @@ -0,0 +1,8 @@ +0 HEAD +1 GEDC +2 VERS 7.0 +1 SUBM @S1@ +0 @S1@ SUBM +1 NAME John Doe +1 WWW https://www.subdomain.example.com/alfa/bravo/charlie/delta/echo/foxtrot/golf/hotel/india/juliett/kilo/lima/mike/november/oscar/papa/quebec/romeo/sierra/tango/uniform/victor/whiskey/xray/yankee/zulu/Lorem%20ipsum%20dolor%20sit%20amet,%20consectetur%20adipiscing%20elit,%20sed%20do%20eiusmod%20tempor%20incididunt%20ut%20labore%20et%20dolore%20magna%20aliqua.%20Ut%20enim%20ad%20minim%20veniam,%20quis%20nostrud%20exercitation%20ullamco%20laboris%20nisi%20ut%20aliquip%20ex%20ea%20commodo%20consequat.%20Duis%20aute%20irure%20dolor%20in%20reprehenderit%20in%20voluptate%20velit%20esse%20cillum%20dolore%20eu%20fugiat%20nulla%20pariatur.%20Excepteur%20sint%20occaecat%20cupidatat%20non%20proident,%20sunt%20in%20culpa%20qui%20officia%20deserunt%20mollit%20anim%20id%20est%20laborum./filename.html +0 TRLR diff --git a/tests/header.rs b/tests/header.rs index cbfa8bf..5c68a7a 100644 --- a/tests/header.rs +++ b/tests/header.rs @@ -282,6 +282,6 @@ mod tests { let data = parser.parse_record(); let h_note = data.header.unwrap().note.unwrap(); - assert_eq!(h_note.value.unwrap().chars().count(), 1441); + assert_eq!(h_note.value.unwrap().chars().count(), 1440); } } diff --git a/tests/lib.rs b/tests/lib.rs index 4eba85e..32b2e5d 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -17,6 +17,8 @@ mod tests { #[test] fn parses_basic_gedcom() { let simple_ged: String = read_relative("./tests/fixtures/simple.ged"); + // let simple_ged: String = read_relative("./tests/fixtures/washington.ged"); + // let simple_ged: String = read_relative("./tests/fixtures/allged.ged"); assert!(simple_ged.len() > 0); let mut parser = Parser::new(simple_ged.chars()); diff --git a/tests/multimedia.rs b/tests/multimedia.rs new file mode 100644 index 0000000..3661319 --- /dev/null +++ b/tests/multimedia.rs @@ -0,0 +1,117 @@ +#[cfg(test)] +mod tests { + use gedcom::parser::Parser; + + #[test] + fn parses_basic_multimedia_record() { + let sample = "\ + 0 HEAD\n\ + 1 CHAR UTF-8\n\ + 1 SOUR Ancestry.com Family Trees\n\ + 2 VERS (2010.3)\n\ + 2 NAME Ancestry.com Family Trees\n\ + 2 CORP Ancestry.com\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 2 FORM LINEAGE-LINKED\n\ + 0 OBJE\n\ + 1 FILE http://trees.ancestry.com/rd?f=image&guid=Xxxxxxxx-Xxxx-Xxxx-Xxxx-Xxxxxxxxxxxx&tid=Xxxxxxxx&pid=1\n\ + 1 FORM jpg\n\ + 1 TITL In Prague\n\ + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + assert_eq!(data.multimedia.len(), 1); + + let obje = &data.multimedia[0]; + assert_eq!(obje.title.as_ref().unwrap(), "In Prague"); + + let form = obje.form.as_ref().unwrap(); + assert_eq!(form.value.as_ref().unwrap(), "jpg"); + + let file = obje.file.as_ref().unwrap(); + assert_eq!( + file.value.as_ref().unwrap(), + "http://trees.ancestry.com/rd?f=image&guid=Xxxxxxxx-Xxxx-Xxxx-Xxxx-Xxxxxxxxxxxx&tid=Xxxxxxxx&pid=1" + ); + } + + #[test] + fn parses_spec_structure() { + let sample = "\ + 0 HEAD\n\ + 1 GEDC\n\ + 2 VERS 5.5\n\ + 2 FORM LINEAGE-LINKED\n\ + 0 @MEDIA1@ OBJE\n\ + 1 FILE /home/user/media/file_name.bmp\n\ + 2 FORM bmp\n\ + 3 TYPE photo + 2 TITL A Bitmap\n\ + 1 REFN 000\n\ + 2 TYPE User Reference Type\n\ + 1 RIN Automated Id\n\ + 1 NOTE A note\n\ + 2 CONT Note continued here. The word TE\n\ + 2 CONC ST should not be broken!\n\ + 1 SOUR @SOUR1@\n\ + 2 PAGE 42 + 2 _CUSTOM Custom data\n\ + 1 CHAN + 2 DATE 1 APR 1998 + 3 TIME 12:34:56.789 + 2 NOTE A note + 3 CONT Note continued here. The word TE + 3 CONC ST should not be broken! + 0 TRLR"; + + let mut parser = Parser::new(sample.chars()); + let data = parser.parse_record(); + assert_eq!(data.multimedia.len(), 1); + + let obje = &data.multimedia[0]; + assert_eq!(obje.xref.as_ref().unwrap(), "@MEDIA1@"); + + let file = obje.file.as_ref().unwrap(); + assert_eq!( + file.value.as_ref().unwrap(), + "/home/user/media/file_name.bmp" + ); + + assert_eq!(file.title.as_ref().unwrap(), "A Bitmap"); + + let form = file.form.as_ref().unwrap(); + assert_eq!(form.value.as_ref().unwrap(), "bmp"); + assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); + + let user_ref = obje.user_reference_number.as_ref().unwrap(); + assert_eq!(user_ref.value.as_ref().unwrap(), "000"); + assert_eq!( + user_ref.user_reference_type.as_ref().unwrap(), + "User Reference Type" + ); + + assert_eq!(obje.automated_record_id.as_ref().unwrap(), "Automated Id"); + + let note = obje.note_structure.as_ref().unwrap(); + assert_eq!( + note.value.as_ref().unwrap(), + "A note\nNote continued here. The word TEST should not be broken!" + ); + + let sour = obje.source_citation.as_ref().unwrap(); + assert_eq!(sour.xref, "@SOUR1@"); + assert_eq!(sour.page.as_ref().unwrap(), "42"); + assert_eq!(sour.custom_data.len(), 1); + assert_eq!(sour.custom_data[0].value, "Custom data"); + + let chan = obje.change_date.as_ref().unwrap(); + let date = chan.date.as_ref().unwrap(); + assert_eq!(date.value.as_ref().unwrap(), "1 APR 1998"); + assert_eq!(date.time.as_ref().unwrap(), "12:34:56.789"); + + let chan_note = chan.note.as_ref().unwrap(); + assert_eq!(chan_note.value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); + } +} From 9e6491f13400566f55848cb3bac2a9482b606c11 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 20 Nov 2022 15:50:40 -0600 Subject: [PATCH 27/55] Modify comment in Multimedia struct --- src/types/multimedia.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index 2c7ce0b..0c5bd6f 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -24,7 +24,7 @@ pub struct Multimedia { /// Optional reference to link to this submitter pub xref: Option, pub file: Option, - /// The 5.5 spec, page 26, shows FILE as a sub-structure of FILE, but the struct appears as a + /// The 5.5 spec, page 26, shows FORM as a sub-structure of FILE, but the struct appears as a /// sibling in an Ancestry.com export. pub form: Option, /// The 5.5 spec, page 26, shows TITL as a sub-structure of FILE, but the struct appears as a From 77c127c9e820d6d2b3796da84f18d47109c289d2 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 20 Nov 2022 15:59:42 -0600 Subject: [PATCH 28/55] Modify README --- readme.md | 1 - 1 file changed, 1 deletion(-) diff --git a/readme.md b/readme.md index b35fbbe..a007b7e 100644 --- a/readme.md +++ b/readme.md @@ -61,7 +61,6 @@ Here are some notes about parsed data & tags. Page references are to the [Gedcom ### Top-level tags * `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. -* `MULTIMEDIA_RECORD` - p.26 - Multimedia (`OBJE`) is not currently parsed. * `NOTE_RECORD` - p.27 - Notes (`NOTE`) are also unhandled. (except in header) Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. From 51bdfdc4816b0e2f2c9cbc3afc21996bac69e3b7 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 20 Nov 2022 16:01:04 -0600 Subject: [PATCH 29/55] Modify README --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 21ae083..152d1e2 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ Here are some notes about parsed data & tags. Page references are to the [Gedcom ### Top-level tags * `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. -* `MULTIMEDIA_RECORD` - p.26 - Multimedia (`OBJE`) is not currently parsed. * `NOTE_RECORD` - p.27 - Notes (`NOTE`) are also unhandled. (except in header) Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. From 50610628d054df2617e2f196a93ef64cc9c15021 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 21 Nov 2022 22:38:11 -0600 Subject: [PATCH 30/55] Handle additional tags in submitter --- README.md | 1 - src/bin.rs | 8 +- src/lib.rs | 189 +++++++++++++++++++++++++++++++++++++-- src/parser.rs | 25 +----- src/tree.rs | 146 ------------------------------ src/types/address.rs | 4 +- src/types/copyright.rs | 4 +- src/types/corporation.rs | 4 +- src/types/custom.rs | 2 +- src/types/date.rs | 6 +- src/types/event.rs | 4 +- src/types/family.rs | 4 +- src/types/header.rs | 22 ++--- src/types/individual.rs | 22 ++--- src/types/multimedia.rs | 99 ++++++++++++++------ src/types/note.rs | 4 +- src/types/repository.rs | 6 +- src/types/source.rs | 12 +-- src/types/submitter.rs | 62 ++++++++++--- src/types/translation.rs | 4 +- src/util.rs | 6 +- tests/header.rs | 26 +++--- tests/lib.rs | 45 +++++++++- tests/multimedia.rs | 8 +- 24 files changed, 422 insertions(+), 291 deletions(-) delete mode 100644 src/tree.rs diff --git a/README.md b/README.md index 152d1e2..c82a41d 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ Here are some notes about parsed data & tags. Page references are to the [Gedcom ### Top-level tags * `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. -* `NOTE_RECORD` - p.27 - Notes (`NOTE`) are also unhandled. (except in header) Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. diff --git a/src/bin.rs b/src/bin.rs index f9c59a6..737deaf 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,9 +1,11 @@ -use gedcom::parser::Parser; -use gedcom::GedcomData; +// use ged::{GedcomRecord, GedcomData}; + use std::env; use std::fs; use std::path::PathBuf; +use gedcom::{GedcomData, GedcomRecord}; + fn main() { let args: Vec = env::args().collect(); match args.len() { @@ -21,7 +23,7 @@ fn main() { let data: GedcomData; if let Ok(contents) = read_relative(filename) { - let mut parser = Parser::new(contents.chars()); + let mut parser = GedcomRecord::new(contents.chars()); data = parser.parse_record(); println!("Parsing complete!"); diff --git a/src/lib.rs b/src/lib.rs index 7adf1e8..9cf3806 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,12 @@ /*! A parser for GEDCOM files ```rust -use gedcom::parser::Parser; - // the parser takes the gedcom file contents as a chars iterator +use gedcom::GedcomRecord; let gedcom_source = std::fs::read_to_string("./tests/fixtures/sample.ged").unwrap(); -let mut parser = Parser::new(gedcom_source.chars()); -let gedcom_data = parser.parse_record(); +let mut record = GedcomRecord::new(gedcom_source.chars()); +let gedcom_data = record.parse_record(); // output some stats on the gedcom contents gedcom_data.stats(); @@ -21,17 +20,191 @@ This crate contains an optional `"json"` feature that implements serialization & #[macro_use] mod util; +use util::{dbg, parse_custom_tag}; -pub mod parser; pub mod tokenizer; +use tokenizer::{Token, Tokenizer}; + pub mod types; +use types::{ + UserDefinedData, Family, Header, Individual, MultimediaRecord, Repository, Source, Submitter, +}; + +mod parser; +pub use parser::Parser; + +use std::str::Chars; + +/// The Gedcom parser that converts the token list into a data structure +pub struct GedcomRecord<'a> { + tokenizer: Tokenizer<'a>, +} + +impl<'a> GedcomRecord<'a> { + /// Creates a parser state machine for parsing a gedcom file as a chars iterator + #[must_use] + pub fn new(chars: Chars<'a>) -> GedcomRecord { + let mut tokenizer = Tokenizer::new(chars); + tokenizer.next_token(); + GedcomRecord { tokenizer } + } + + /// Does the actual parsing of the record. + pub fn parse_record(&mut self) -> GedcomData { + GedcomData::new(&mut self.tokenizer, 0) + } +} + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +/// The data structure representing all the data within a gedcom file +pub struct GedcomData { + /// Header containing file metadata + pub header: Option
, + /// List of submitters of the facts + pub submitters: Vec, + /// Individuals within the family tree + pub individuals: Vec, + /// The family units of the tree, representing relationships between individuals + pub families: Vec, + /// A data repository where `sources` are held + pub repositories: Vec, + /// Sources of facts. _ie._ book, document, census, etc. + pub sources: Vec, + /// A multimedia asset linked to a fact + pub multimedia: Vec, + /// Applications requiring the use of nonstandard tags should define them with a leading underscore + /// so that they will not conflict with future GEDCOM standard tags. Systems that read + /// user-defined tags must consider that they have meaning only with respect to a system + /// contained in the HEAD.SOUR context. + pub custom_data: Vec, +} + +// should maybe store these by xref if available? +impl GedcomData { + /// contructor for GedcomData + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomData { + let mut data = GedcomData::default(); + data.parse(tokenizer, level); + data + } -mod tree; -pub use tree::GedcomData; + /// Adds a `Family` (a relationship between individuals) to the tree + pub fn add_family(&mut self, family: Family) { + self.families.push(family); + } + + /// Adds an `Individual` to the tree + pub fn add_individual(&mut self, individual: Individual) { + self.individuals.push(individual); + } + + /// Adds a data `Repository` to the tree + pub fn add_repository(&mut self, repo: Repository) { + self.repositories.push(repo); + } + + /// Adds a `Source` to the tree + pub fn add_source(&mut self, source: Source) { + self.sources.push(source); + } + + /// Adds a `Submitter` to the tree + pub fn add_submitter(&mut self, submitter: Submitter) { + self.submitters.push(submitter); + } + + /// Adds a `Multimedia` to the tree + pub fn add_multimedia(&mut self, multimedia: MultimediaRecord) { + self.multimedia.push(multimedia); + } + + /// Adds a `UserDefinedData` to the tree + pub fn add_custom_data(&mut self, data: UserDefinedData) { + self.custom_data.push(data) + } + + /// Outputs a summary of data contained in the tree to stdout + pub fn stats(&self) { + println!("----------------------"); + println!("| Gedcom Data Stats: |"); + println!("----------------------"); + println!(" submitters: {}", self.submitters.len()); + println!(" individuals: {}", self.individuals.len()); + println!(" families: {}", self.families.len()); + println!(" repositories: {}", self.repositories.len()); + println!(" sources: {}", self.sources.len()); + println!(" multimedia: {}", self.multimedia.len()); + println!("----------------------"); + } +} + +impl Parser for GedcomData { + /// Does the actual parsing of the record. + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + loop { + // TODO is this necessary? + let current_level = match tokenizer.current_token { + Token::Level(n) => n, + _ => panic!( + "{} Expected Level, found {:?}", + dbg(tokenizer), + tokenizer.current_token + ), + }; + + tokenizer.next_token(); + + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + + if let Token::Tag(tag) = &tokenizer.current_token { + match tag.as_str() { + "HEAD" => self.header = Some(Header::new(tokenizer, level)), + "FAM" => self.add_family(Family::new(tokenizer, level, pointer)), + "INDI" => { + self.add_individual(Individual::new(tokenizer, current_level, pointer)) + } + "REPO" => { + self.add_repository(Repository::new(tokenizer, current_level, pointer)) + } + "SOUR" => self.add_source(Source::new(tokenizer, current_level, pointer)), + "SUBM" => self.add_submitter(Submitter::new(tokenizer, level, pointer)), + "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level, pointer)), + "TRLR" => break, + _ => { + println!("{} Unhandled tag {}", dbg(tokenizer), tag); + tokenizer.next_token(); + } + }; + } else if let Token::CustomTag(tag) = &tokenizer.current_token { + let tag_clone = tag.clone(); + self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); + while tokenizer.current_token != Token::Level(level) { + tokenizer.next_token(); + } + } else { + println!( + "{} Unhandled token {:?}", + dbg(tokenizer), + tokenizer.current_token + ); + tokenizer.next_token(); + }; + } + } +} #[must_use] /// Helper function for converting GEDCOM file content stream to parsed data. pub fn parse(content: std::str::Chars) -> GedcomData { - let mut p = parser::Parser::new(content); + let mut p = GedcomRecord::new(content); p.parse_record() } diff --git a/src/parser.rs b/src/parser.rs index f5ea585..ad73e84 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,30 +1,7 @@ -//! The state machine that parses a char iterator of the gedcom's contents -use std::str::Chars; use crate::tokenizer::Tokenizer; -use crate::tree::GedcomData; /// Parse converts a subset of a token list into a type's data structure. -pub trait Parse { +pub trait Parser { /// parse does the actual parsing of a subset of a token list fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8); } - -/// The Gedcom parser that converts the token list into a data structure -pub struct Parser<'a> { - tokenizer: Tokenizer<'a>, -} - -impl<'a> Parser<'a> { - /// Creates a parser state machine for parsing a gedcom file as a chars iterator - #[must_use] - pub fn new(chars: Chars<'a>) -> Parser { - let mut tokenizer = Tokenizer::new(chars); - tokenizer.next_token(); - Parser { tokenizer } - } - - /// Does the actual parsing of the record. - pub fn parse_record(&mut self) -> GedcomData { - GedcomData::new(&mut self.tokenizer, 0) - } -} diff --git a/src/tree.rs b/src/tree.rs deleted file mode 100644 index ff63dbc..0000000 --- a/src/tree.rs +++ /dev/null @@ -1,146 +0,0 @@ -use crate::{ - parser::Parse, - tokenizer::{Tokenizer, Token}, - types::{Family, Header, Individual, Multimedia, Repository, Source, Submitter}, - util::{dbg, parse_custom_tag}, -}; -#[cfg(feature = "json")] -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Default)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// The data structure representing all the data within a gedcom file -pub struct GedcomData { - /// Header containing file metadata - pub header: Option
, - /// List of submitters of the facts - pub submitters: Vec, - /// Individuals within the family tree - pub individuals: Vec, - /// The family units of the tree, representing relationships between individuals - pub families: Vec, - /// A data repository where `sources` are held - pub repositories: Vec, - /// Sources of facts. _ie._ book, document, census, etc. - pub sources: Vec, - /// A multimedia asset linked to a fact - pub multimedia: Vec, -} - -// should maybe store these by xref if available? -impl GedcomData { - /// contructor for GedcomData - #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomData { - let mut data = GedcomData::default(); - data.parse(tokenizer, level); - data - } - - /// Adds a `Family` (a relationship between individuals) to the tree - pub fn add_family(&mut self, family: Family) { - self.families.push(family); - } - - /// Adds an `Individual` to the tree - pub fn add_individual(&mut self, individual: Individual) { - self.individuals.push(individual); - } - - /// Adds a data `Repository` to the tree - pub fn add_repository(&mut self, repo: Repository) { - self.repositories.push(repo); - } - - /// Adds a `Source` to the tree - pub fn add_source(&mut self, source: Source) { - self.sources.push(source); - } - - /// Adds a `Submitter` to the tree - pub fn add_submitter(&mut self, submitter: Submitter) { - self.submitters.push(submitter); - } - - /// Adds a `Multimedia` to the tree - pub fn add_multimedia(&mut self, multimedia: Multimedia) { - self.multimedia.push(multimedia); - } - - /// Outputs a summary of data contained in the tree to stdout - pub fn stats(&self) { - println!("----------------------"); - println!("| Gedcom Data Stats: |"); - println!("----------------------"); - println!(" submitters: {}", self.submitters.len()); - println!(" individuals: {}", self.individuals.len()); - println!(" families: {}", self.families.len()); - println!(" repositories: {}", self.repositories.len()); - println!(" sources: {}", self.sources.len()); - println!(" multimedia: {}", self.multimedia.len()); - println!("----------------------"); - } -} - -impl Parse for GedcomData { - /// Does the actual parsing of the record. - fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - loop { - // TODO is this necessary? - let current_level = match tokenizer.current_token { - Token::Level(n) => n, - _ => panic!( - "{} Expected Level, found {:?}", - dbg(tokenizer), - tokenizer.current_token - ), - }; - - tokenizer.next_token(); - - let mut pointer: Option = None; - if let Token::Pointer(xref) = &tokenizer.current_token { - pointer = Some(xref.to_string()); - tokenizer.next_token(); - } - - if let Token::Tag(tag) = &tokenizer.current_token { - match tag.as_str() { - "HEAD" => self.header = Some(Header::new(tokenizer, level)), - "FAM" => self.add_family(Family::new(tokenizer, level, pointer)), - "INDI" => { - self.add_individual(Individual::new(tokenizer, current_level, pointer)) - } - "REPO" => self.add_repository(Repository::new(tokenizer, current_level, pointer)), - "SOUR" => self.add_source(Source::new(tokenizer, current_level, pointer)), - "SUBM" => self.add_submitter(Submitter::new(tokenizer, level, pointer)), - "OBJE" => self.add_multimedia(Multimedia::new(tokenizer, level, pointer)), - "TRLR" => break, - _ => { - println!("{} Unhandled tag {}", dbg(tokenizer), tag); - tokenizer.next_token(); - } - }; - } else if let Token::CustomTag(tag) = &tokenizer.current_token { - // TODO - let tag_clone = tag.clone(); - let custom_data = parse_custom_tag(tokenizer, tag_clone); - println!( - "{} Skipping top-level custom tag: {:?}", - dbg(tokenizer), - custom_data - ); - while tokenizer.current_token != Token::Level(level) { - tokenizer.next_token(); - } - } else { - println!( - "{} Unhandled token {:?}", - dbg(tokenizer), - tokenizer.current_token - ); - tokenizer.next_token(); - }; - } - } -} diff --git a/src/types/address.rs b/src/types/address.rs index c78a9c8..e497435 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use std::fmt; use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, util::{dbg, take_line_value}, }; @@ -31,7 +31,7 @@ impl Address { } } -impl Parse for Address { +impl Parser for Address { /// parse handles ADDR tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip ADDR tag diff --git a/src/types/copyright.rs b/src/types/copyright.rs index c36ee25..ed236dd 100644 --- a/src/types/copyright.rs +++ b/src/types/copyright.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, util::{dbg, take_line_value}, }; @@ -26,7 +26,7 @@ impl Copyright { } } -impl Parse for Copyright { +impl Parser for Copyright { /// parse the COPR tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); diff --git a/src/types/corporation.rs b/src/types/corporation.rs index b7b005b..5e0dd9a 100644 --- a/src/types/corporation.rs +++ b/src/types/corporation.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, types::Address, util::{dbg, take_line_value}, @@ -35,7 +35,7 @@ impl Corporation { } } -impl Parse for Corporation { +impl Parser for Corporation { /// parse is for a CORP tag within the SOUR tag of a HEADER fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); diff --git a/src/types/custom.rs b/src/types/custom.rs index d640dfd..e602117 100644 --- a/src/types/custom.rs +++ b/src/types/custom.rs @@ -1,6 +1,6 @@ #[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct CustomData { +pub struct UserDefinedData { pub tag: String, pub value: String, } diff --git a/src/types/date.rs b/src/types/date.rs index 5b56e5f..a712505 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, util::{dbg, take_line_value}, }; @@ -40,7 +40,7 @@ impl Date { } } -impl Parse for Date { +impl Parser for Date { /// parse handles the DATE tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); @@ -83,7 +83,7 @@ impl ChangeDate { } } -impl Parse for ChangeDate { +impl Parser for ChangeDate { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); diff --git a/src/types/event.rs b/src/types/event.rs index 88645b0..ccd776b 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, types::SourceCitation, util::{dbg, take_line_value}, @@ -119,7 +119,7 @@ pub trait HasEvents { } } -impl Parse for Event { +impl Parser for Event { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); diff --git a/src/types/family.rs b/src/types/family.rs index 1b5881e..9620bc7 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, types::{event::HasEvents, Event}, util::{dbg, take_line_value}, @@ -55,7 +55,7 @@ impl Family { } } -impl Parse for Family { +impl Parser for Family { /// parse handles FAM top-level tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip over FAM tag name diff --git a/src/types/header.rs b/src/types/header.rs index 8a9bd27..452abd1 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,6 +1,6 @@ use crate::util::dbg; use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, types::{Copyright, Corporation, Date, Note}, util::{parse_custom_tag, take_line_value}, @@ -8,7 +8,7 @@ use crate::{ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use super::CustomData; +use super::UserDefinedData; /// Header (tag: HEAD) containing GEDCOM metadata. /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER @@ -44,7 +44,7 @@ pub struct Header { pub note: Option, /// tag: PLAC pub place: Option, - pub custom_data: Vec, + pub custom_data: Vec, } impl Header { @@ -55,12 +55,12 @@ impl Header { header } - pub fn add_custom_data(&mut self, data: CustomData) { + pub fn add_custom_data(&mut self, data: UserDefinedData) { self.custom_data.push(data) } } -impl Parse for Header { +impl Parser for Header { /// Parses HEAD top-level tag. See /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { @@ -89,7 +89,7 @@ impl Parse for Header { Token::CustomTag(tag) => { let tag_clone = tag.clone(); self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) - } + }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Header Token: {:?}", &tokenizer.current_token), } @@ -118,7 +118,7 @@ impl GedcomDoc { } } -impl Parse for GedcomDoc { +impl Parser for GedcomDoc { /// parse handles parsing GEDC tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip GEDC tag @@ -176,7 +176,7 @@ impl Encoding { } } -impl Parse for Encoding { +impl Parser for Encoding { /// parse handles the parsing of the CHARS tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); @@ -230,7 +230,7 @@ impl HeadSour { } } -impl Parse for HeadSour { +impl Parser for HeadSour { /// parse handles the SOUR tag in a header fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); @@ -278,7 +278,7 @@ impl HeadSourData { } } -impl Parse for HeadSourData { +impl Parser for HeadSourData { /// parse parses the DATA tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); @@ -342,7 +342,7 @@ impl HeadPlac { } } -impl Parse for HeadPlac { +impl Parser for HeadPlac { /// parse handles the PLAC tag when present in header fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { diff --git a/src/types/individual.rs b/src/types/individual.rs index 1acb25a..91c9091 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,7 +1,7 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, CustomData, Event, Multimedia, SourceCitation}, + types::{event::HasEvents, UserDefinedData, Event, MultimediaRecord, SourceCitation}, util::{dbg, parse_custom_tag, take_line_value}, }; @@ -18,10 +18,10 @@ pub struct Individual { pub name: Option, pub sex: Gender, pub families: Vec, - pub custom_data: Vec, + pub custom_data: Vec, pub last_updated: Option, pub source: Vec, - pub multimedia: Vec, + pub multimedia: Vec, events: Vec, } @@ -56,7 +56,7 @@ impl Individual { } } - pub fn add_custom_data(&mut self, data: CustomData) { + pub fn add_custom_data(&mut self, data: UserDefinedData) { self.custom_data.push(data) } @@ -64,7 +64,7 @@ impl Individual { self.source.push(sour); } - pub fn add_multimedia(&mut self, multimedia: Multimedia) { + pub fn add_multimedia(&mut self, multimedia: MultimediaRecord) { self.multimedia.push(multimedia); } } @@ -78,7 +78,7 @@ impl HasEvents for Individual { } } -impl Parse for Individual { +impl Parser for Individual { /// parse handles the INDI top-level tag fn parse(&mut self, tokenizer: &mut crate::tokenizer::Tokenizer, level: u8) { // skip over INDI tag name @@ -110,7 +110,7 @@ impl Parse for Individual { self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); } // TODO handle xref - "OBJE" => self.add_multimedia(Multimedia::new(tokenizer, level + 1, None)), + "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)), _ => panic!("{} Unhandled Individual Tag: {}", dbg(tokenizer), tag), }, Token::CustomTag(tag) => { @@ -142,7 +142,7 @@ impl Gender { } } -impl Parse for Gender { +impl Parser for Gender { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); if let Token::LineValue(gender_string) = &tokenizer.current_token { @@ -213,7 +213,7 @@ impl FamilyLink { } } -impl Parse for FamilyLink { +impl Parser for FamilyLink { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -265,7 +265,7 @@ impl Name { } } -impl Parse for Name { +impl Parser for Name { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index 0c5bd6f..db0005c 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -1,7 +1,7 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, - types::{SourceCitation, Note, Xref}, + types::{Note, SourceCitation, Xref}, util::{dbg, take_line_value}, }; @@ -20,7 +20,7 @@ use super::ChangeDate; /// The change and creation dates should be for the OBJE record itself, not the underlying files. /// /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#MULTIMEDIA_RECORD. -pub struct Multimedia { +pub struct MultimediaRecord { /// Optional reference to link to this submitter pub xref: Option, pub file: Option, @@ -37,10 +37,10 @@ pub struct Multimedia { pub note_structure: Option, } -impl Multimedia { +impl MultimediaRecord { #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Multimedia { - let mut obje = Multimedia{ + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> MultimediaRecord { + let mut obje = MultimediaRecord { xref, file: None, form: None, @@ -56,7 +56,7 @@ impl Multimedia { } } -impl Parse for Multimedia { +impl Parser for MultimediaRecord { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip current line tokenizer.next_token(); @@ -72,21 +72,72 @@ impl Parse for Multimedia { "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), "TITL" => self.title = Some(take_line_value(tokenizer)), - "REFN" => self.user_reference_number = Some(UserReferenceNumber::new(tokenizer, level + 1)), + "REFN" => { + self.user_reference_number = + Some(UserReferenceNumber::new(tokenizer, level + 1)) + } "RIN" => self.automated_record_id = Some(take_line_value(tokenizer)), "NOTE" => self.note_structure = Some(Note::new(tokenizer, level + 1)), - "SOUR" => self.source_citation = Some(SourceCitation::new(tokenizer, level + 1)), + "SOUR" => { + self.source_citation = Some(SourceCitation::new(tokenizer, level + 1)) + } "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - _ => panic!( - "{} Unhandled Multimedia Tag: {}", - dbg(tokenizer), - tag - ), + _ => panic!("{} Unhandled Multimedia Tag: {}", dbg(tokenizer), tag), }, - _ => panic!( - "Unhandled Multimedia Token: {:?}", - tokenizer.current_token - ), + _ => panic!("Unhandled Multimedia Token: {:?}", tokenizer.current_token), + } + } + } +} + +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +/// MultimediaLink +pub struct MultimediaLink { + /// Optional reference to link to this submitter + pub xref: Option, + pub file: Option, + /// The 5.5 spec, page 26, shows FORM as a sub-structure of FILE, but the struct appears as a + /// sibling in an Ancestry.com export. + pub form: Option, + /// The 5.5 spec, page 26, shows TITL as a sub-structure of FILE, but the struct appears as a + /// sibling in an Ancestry.com export. + pub title: Option, +} + +impl MultimediaLink { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> MultimediaLink { + let mut obje = MultimediaLink { + xref, + file: None, + form: None, + title: None, + }; + obje.parse(tokenizer, level); + obje + } +} + +impl Parser for MultimediaLink { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip current line + tokenizer.next_token(); + loop { + if let Token::Level(curl_level) = tokenizer.current_token { + if curl_level <= level { + break; + } + } + tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), + "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), + "TITL" => self.title = Some(take_line_value(tokenizer)), + _ => panic!("{} Unhandled Multimedia Tag: {}", dbg(tokenizer), tag), + }, + _ => panic!("Unhandled Multimedia Token: {:?}", tokenizer.current_token), } } } @@ -113,7 +164,7 @@ impl MultimediaFileRefn { } } -impl Parse for MultimediaFileRefn { +impl Parser for MultimediaFileRefn { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); loop { @@ -165,7 +216,7 @@ impl MultimediaFormat { } } -impl Parse for MultimediaFormat { +impl Parser for MultimediaFormat { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); loop { @@ -178,11 +229,7 @@ impl Parse for MultimediaFormat { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "TYPE" => self.source_media_type = Some(take_line_value(tokenizer)), - _ => panic!( - "{} Unhandled MultimediaFormat Tag: {}", - dbg(tokenizer), - tag - ), + _ => panic!("{} Unhandled MultimediaFormat Tag: {}", dbg(tokenizer), tag), }, _ => panic!( "Unhandled MultimediaFormat Token: {:?}", @@ -215,7 +262,7 @@ impl UserReferenceNumber { } } -impl Parse for UserReferenceNumber { +impl Parser for UserReferenceNumber { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(take_line_value(tokenizer)); diff --git a/src/types/note.rs b/src/types/note.rs index 7a54bb1..4228ea7 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, types::{Source, Translation}, util::dbg, @@ -47,7 +47,7 @@ impl Note { } } -impl Parse for Note { +impl Parser for Note { /// parse handles the NOTE tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { let mut value = String::new(); diff --git a/src/types/repository.rs b/src/types/repository.rs index ad3b8d5..0712f88 100644 --- a/src/types/repository.rs +++ b/src/types/repository.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, util::{dbg, take_line_value}, }; @@ -31,7 +31,7 @@ impl Repository { } } -impl Parse for Repository { +impl Parser for Repository { /// Parses REPO top-level tag. fn parse(&mut self, tokenizer: &mut crate::tokenizer::Tokenizer, level: u8) { // skip REPO tag @@ -77,7 +77,7 @@ impl RepoCitation { } } -impl Parse for RepoCitation { +impl Parser for RepoCitation { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { loop { if let Token::Level(cur_level) = tokenizer.current_token { diff --git a/src/types/source.rs b/src/types/source.rs index 3a02f09..407fd5e 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,7 +1,7 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, - types::{Event, RepoCitation, CustomData}, + types::{Event, RepoCitation, UserDefinedData}, util::{dbg, take_continued_text, take_line_value, parse_custom_tag}, }; @@ -43,7 +43,7 @@ impl Source { } } -impl Parse for Source { +impl Parser for Source { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip SOUR tag tokenizer.next_token(); @@ -98,7 +98,7 @@ pub struct SourceCitation { pub xref: Xref, /// Page number of source pub page: Option, - pub custom_data: Vec, + pub custom_data: Vec, } impl SourceCitation { @@ -113,12 +113,12 @@ impl SourceCitation { citation } - pub fn add_custom_data(&mut self, data: CustomData) { + pub fn add_custom_data(&mut self, data: UserDefinedData) { self.custom_data.push(data) } } -impl Parse for SourceCitation { +impl Parser for SourceCitation { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { loop { if let Token::Level(cur_level) = tokenizer.current_token { diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 35f08d6..41c655a 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -1,8 +1,8 @@ use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, - types::Address, - util::{dbg, take_line_value}, + types::{Address, ChangeDate, UserDefinedData, MultimediaLink, Note}, + util::{dbg, parse_custom_tag, take_line_value}, }; #[cfg(feature = "json")] @@ -10,7 +10,10 @@ use serde::{Deserialize, Serialize}; type Xref = String; -/// Submitter of the data, ie. who reported the genealogy fact +/// The submitter record identifies an individual or organization that contributed information +/// contained in the GEDCOM transmission. All records in the transmission are assumed to be +/// submitted by the SUBMITTER referenced in the HEADer, unless a SUBMitter reference inside a +/// specific record points at a different SUBMITTER record. #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Submitter { @@ -20,10 +23,24 @@ pub struct Submitter { pub name: Option, /// Physical address of the submitter pub address: Option
, + /// A multimedia asset linked to a fact + pub multimedia: Vec, + /// Language preference + pub language: Option, + /// A registered number of a submitter of Ancestral File data. This number is used in + /// subsequent submissions or inquiries by the submitter for identification purposes. + pub registered_refn: Option, + /// A unique record identification number assigned to the record by the source system. This + /// number is intended to serve as a more sure means of identification of a record for + /// reconciling differences in data between two interfacing systems. + pub automated_record_id: Option, + /// Date of the last change to the record + pub change_date: Option, + /// Note provided by submitter about the enclosing data + pub note: Option, /// Phone number of the submitter pub phone: Option, - /// TODO - pub language: Option, + pub custom_data: Vec, } impl Submitter { @@ -35,27 +52,50 @@ impl Submitter { subm.parse(tokenizer, level); subm } + + /// Adds a `Multimedia` to the tree + pub fn add_multimedia(&mut self, multimedia: MultimediaLink) { + self.multimedia.push(multimedia); + } + + + /// + pub fn add_custom_data(&mut self, data: UserDefinedData) { + self.custom_data.push(data) + } } -impl Parse for Submitter { +impl Parser for Submitter { /// Parse handles SUBM top-level tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - // skip over SUBM tag name tokenizer.next_token(); while tokenizer.current_token != Token::Level(level) { + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "NAME" => self.name = Some(take_line_value(tokenizer)), "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), - "PHON" => self.phone = Some(take_line_value(tokenizer)), + "OBJE" => { + self.add_multimedia(MultimediaLink::new(tokenizer, level + 1, pointer)) + } "LANG" => self.language = Some(take_line_value(tokenizer)), - // TODO - // "CHAN" => submitter.change_date = Some(take_line_value(&mut self.tokenizer)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "PHON" => self.phone = Some(take_line_value(tokenizer)), _ => panic!("{} Unhandled Submitter Tag: {}", dbg(tokenizer), tag), }, Token::Level(_) => tokenizer.next_token(), + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); + } _ => panic!("Unhandled Submitter Token: {:?}", tokenizer.current_token), } } diff --git a/src/types/translation.rs b/src/types/translation.rs index f6fc188..84995cf 100644 --- a/src/types/translation.rs +++ b/src/types/translation.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; use crate::{ - parser::Parse, + parser::Parser, tokenizer::{Token, Tokenizer}, util::dbg, util::take_line_value, @@ -31,7 +31,7 @@ impl Translation { } } -impl Parse for Translation { +impl Parser for Translation { ///parse handles the TRAN tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { diff --git a/src/util.rs b/src/util.rs index b3308b0..0e6a2d4 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,6 +1,6 @@ use crate::{ tokenizer::{Token, Tokenizer}, - types::CustomData, + types::UserDefinedData, }; /// Macro for displaying `Option`s in debug mode without the text wrapping. @@ -38,9 +38,9 @@ pub fn take_line_value(tokenizer: &mut Tokenizer) -> String { value } -pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> CustomData { +pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> UserDefinedData { let value = take_line_value(tokenizer); - CustomData { tag, value } + UserDefinedData { tag, value } } /// Takes the value of the current line including handling diff --git a/tests/header.rs b/tests/header.rs index 5c68a7a..3664ac0 100644 --- a/tests/header.rs +++ b/tests/header.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use gedcom::parser::Parser; + use gedcom::GedcomRecord; #[test] fn parse_head_gedc() { @@ -11,7 +11,7 @@ mod tests { 2 FORM LINEAGE-LINKED\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let head_gedc = data.header.unwrap().gedcom.unwrap(); @@ -45,7 +45,7 @@ mod tests { 3 COPR Copyright of source data\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let sour = data.header.unwrap().source.unwrap(); @@ -94,7 +94,7 @@ mod tests { 1 DEST Destination of transmission\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); assert_eq!( @@ -113,7 +113,7 @@ mod tests { 2 TIME 13:57:24.80\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_date = data.header.unwrap().date.unwrap(); @@ -132,7 +132,7 @@ mod tests { 1 FILE ALLGED.GED\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_subm = data.header.unwrap().submitter_tag.unwrap(); @@ -150,7 +150,7 @@ mod tests { 1 FILE ALLGED.GED\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_subn = data.header.unwrap().submission_tag.unwrap(); @@ -168,7 +168,7 @@ mod tests { 1 FILE ALLGED.GED\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_file = data.header.unwrap().filename.unwrap(); @@ -185,7 +185,7 @@ mod tests { 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_copr = data.header.unwrap().copyright.unwrap(); @@ -206,7 +206,7 @@ mod tests { 2 VERS Version number of ASCII (whatever it means)\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_char = data.header.unwrap().encoding.unwrap(); @@ -226,7 +226,7 @@ mod tests { 1 LANG language 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_lang = data.header.unwrap().language.unwrap(); @@ -243,7 +243,7 @@ mod tests { 2 FORM City, County, State, Country\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_plac = data.header.unwrap().place.unwrap(); @@ -278,7 +278,7 @@ mod tests { 2 CONC ST should not be broken!\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); let h_note = data.header.unwrap().note.unwrap(); diff --git a/tests/lib.rs b/tests/lib.rs index 32b2e5d..056558d 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -11,17 +11,16 @@ pub mod util { #[cfg(test)] mod tests { use super::util::read_relative; - use gedcom::parser::Parser; + use gedcom::GedcomRecord; use gedcom::types::event::HasEvents; #[test] fn parses_basic_gedcom() { let simple_ged: String = read_relative("./tests/fixtures/simple.ged"); - // let simple_ged: String = read_relative("./tests/fixtures/washington.ged"); // let simple_ged: String = read_relative("./tests/fixtures/allged.ged"); assert!(simple_ged.len() > 0); - let mut parser = Parser::new(simple_ged.chars()); + let mut parser = GedcomRecord::new(simple_ged.chars()); let data = parser.parse_record(); assert_eq!(data.individuals.len(), 3); assert_eq!(data.families.len(), 1); @@ -67,4 +66,44 @@ mod tests { assert_eq!(events[0].event.to_string(), "Marriage"); assert_eq!(events[0].date.as_ref().unwrap(), "1 APR 1950"); } + + #[test] + fn parses_basic_washington_doc() { + let simple_ged: String = read_relative("./tests/fixtures/washington.ged"); + assert!(simple_ged.len() > 0); + + let mut parser = GedcomRecord::new(simple_ged.chars()); + let data = parser.parse_record(); + assert_eq!(data.individuals.len(), 538); + assert_eq!(data.families.len(), 278); + // assert_eq!(data.submitters.len(), 0); + + let header = data.header.unwrap(); + + // header + assert_eq!( + header.encoding.unwrap().value.unwrap().as_str(), + "UTF-8" + ); + // assert_eq!(header.submitter_tag.unwrap().as_str(), "@SUBMITTER@"); + assert_eq!(header.gedcom.unwrap().version.unwrap(), "5.5.1"); + + // names + assert_eq!( + data.individuals[0] + .name + .as_ref() + .unwrap() + .value + .as_ref() + .unwrap(), + "George /Washington/" + ); + + // events + let events = data.families[0].events(); + assert_eq!(events.len(), 1); + assert_eq!(events[0].event.to_string(), "Marriage"); + assert_eq!(events[0].date.as_ref().unwrap(), "6 MAR 1730"); + } } diff --git a/tests/multimedia.rs b/tests/multimedia.rs index 3661319..abe9eea 100644 --- a/tests/multimedia.rs +++ b/tests/multimedia.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use gedcom::parser::Parser; + use gedcom::GedcomRecord; #[test] fn parses_basic_multimedia_record() { @@ -20,8 +20,8 @@ mod tests { 1 TITL In Prague\n\ 0 TRLR"; - let mut parser = Parser::new(sample.chars()); - let data = parser.parse_record(); + let mut record = GedcomRecord::new(sample.chars()); + let data = record.parse_record(); assert_eq!(data.multimedia.len(), 1); let obje = &data.multimedia[0]; @@ -66,7 +66,7 @@ mod tests { 3 CONC ST should not be broken! 0 TRLR"; - let mut parser = Parser::new(sample.chars()); + let mut parser = GedcomRecord::new(sample.chars()); let data = parser.parse_record(); assert_eq!(data.multimedia.len(), 1); From 1d6cf1c5d41020a9a96e529aaa67a4f72af1d038 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Tue, 22 Nov 2022 22:19:10 -0600 Subject: [PATCH 31/55] Make some util helper functions methods of Tokenizer --- readme.md | 3 +- src/lib.rs | 9 +++--- src/tokenizer.rs | 64 ++++++++++++++++++++++++++++++++++++ src/types/address.rs | 19 ++++++----- src/types/copyright.rs | 9 +++--- src/types/corporation.rs | 13 ++++---- src/types/date.rs | 9 +++--- src/types/event.rs | 7 ++-- src/types/family.rs | 9 +++--- src/types/header.rs | 48 +++++++++++++-------------- src/types/individual.rs | 29 ++++++++--------- src/types/mod.rs | 3 ++ src/types/multimedia.rs | 29 ++++++++--------- src/types/note.rs | 14 ++++---- src/types/place.rs | 1 - src/types/repository.rs | 11 +++---- src/types/source.rs | 19 ++++++----- src/types/submitter.rs | 11 +++---- src/types/translation.rs | 10 +++--- src/util.rs | 70 ---------------------------------------- 20 files changed, 182 insertions(+), 205 deletions(-) diff --git a/readme.md b/readme.md index a007b7e..da06c1e 100644 --- a/readme.md +++ b/readme.md @@ -11,7 +11,7 @@ ## About this project -GEDCOM is a file format for sharing genealogical information like family trees! It's being made obsolete by [GEDCOM-X](https://github.com/FamilySearch/gedcomx) but is still widely used in many genealogy programs. +GEDCOM is a file format for sharing genealogical information like family trees, and it's widely used in many genealogy programs. I wanted experience playing with parsers and representing tree structures in Rust, and noticed a parser for Rust did not exist. And thus, this project was born! A fun experiment to practice my Rust abilities. @@ -65,7 +65,6 @@ Here are some notes about parsed data & tags. Page references are to the [Gedcom Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. - ## Notes to self * Consider creating some Traits to handle change dates, notes, source citations, and other recurring fields. diff --git a/src/lib.rs b/src/lib.rs index 9cf3806..f386e3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,6 @@ This crate contains an optional `"json"` feature that implements serialization & #[macro_use] mod util; -use util::{dbg, parse_custom_tag}; pub mod tokenizer; use tokenizer::{Token, Tokenizer}; @@ -152,7 +151,7 @@ impl Parser for GedcomData { Token::Level(n) => n, _ => panic!( "{} Expected Level, found {:?}", - dbg(tokenizer), + tokenizer.debug(), tokenizer.current_token ), }; @@ -180,20 +179,20 @@ impl Parser for GedcomData { "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level, pointer)), "TRLR" => break, _ => { - println!("{} Unhandled tag {}", dbg(tokenizer), tag); + println!("{} Unhandled tag {}", tokenizer.debug(), tag); tokenizer.next_token(); } }; } else if let Token::CustomTag(tag) = &tokenizer.current_token { let tag_clone = tag.clone(); - self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); while tokenizer.current_token != Token::Level(level) { tokenizer.next_token(); } } else { println!( "{} Unhandled token {:?}", - dbg(tokenizer), + tokenizer.debug(), tokenizer.current_token ); tokenizer.next_token(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7dfc354..a3ca781 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,6 +1,8 @@ //! Handles the tokenization of a GEDCOM file use std::str::Chars; +use crate::types::UserDefinedData; + /// The base enum of Token types /// /// making use of [GEDCOM Standard Release 5.5.1](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf), p.11 @@ -152,4 +154,66 @@ impl<'a> Tokenizer<'a> { let not_a_newline = self.current_char != '\n'; (self.current_char.is_whitespace() || is_zero_width_space) && not_a_newline } + + /// Debug function displaying GEDCOM line number of error message. + pub fn debug(&self) -> String { + format!("line {}:", self.line) + } + + /// Grabs and returns to the end of the current line as a String + pub fn take_line_value(&mut self) -> String { + let value: String; + self.next_token(); + + if let Token::LineValue(val) = &self.current_token { + value = val.to_string(); + } else { + panic!( + "{} Expected LineValue, found {:?}", + self.debug(), + self.current_token + ); + } + self.next_token(); + value + } + + /// Takes the value of the current line including handling + /// multi-line values from CONT & CONC tags. + pub fn take_continued_text(&mut self, level: u8) -> String { + let mut value = self.take_line_value(); + + loop { + if let Token::Level(cur_level) = self.current_token { + if cur_level <= level { + break; + } + } + match &self.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONT" => { + value.push('\n'); + value.push_str(&self.take_line_value()) + } + "CONC" => { + value.push(' '); + value.push_str(&self.take_line_value()) + } + _ => panic!("{} Unhandled Continuation Tag: {}", self.debug(), tag), + }, + Token::Level(_) => self.next_token(), + _ => panic!( + "Unhandled Continuation Token: {:?}", + self.current_token + ), + } + } + value + } + + /// parse_custom_tag handles User Defined Data. See Gedcom 5.5 spec, p.56 + pub fn parse_custom_tag(&mut self, tag: String) -> UserDefinedData { + let value = self.take_line_value(); + UserDefinedData { tag, value } + } } diff --git a/src/types/address.rs b/src/types/address.rs index e497435..11f37a4 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -5,7 +5,6 @@ use std::fmt; use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - util::{dbg, take_line_value}, }; /// Physical address at which a fact occurs @@ -55,16 +54,16 @@ impl Parser for Address { Token::Tag(tag) => match tag.as_str() { "CONT" | "CONC" => { value.push('\n'); - value.push_str(&take_line_value(tokenizer)); + value.push_str(&tokenizer.take_line_value()); } - "ADR1" => self.adr1 = Some(take_line_value(tokenizer)), - "ADR2" => self.adr2 = Some(take_line_value(tokenizer)), - "ADR3" => self.adr3 = Some(take_line_value(tokenizer)), - "CITY" => self.city = Some(take_line_value(tokenizer)), - "STAE" => self.state = Some(take_line_value(tokenizer)), - "POST" => self.post = Some(take_line_value(tokenizer)), - "CTRY" => self.country = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled Address Tag: {}", dbg(tokenizer), tag), + "ADR1" => self.adr1 = Some(tokenizer.take_line_value()), + "ADR2" => self.adr2 = Some(tokenizer.take_line_value()), + "ADR3" => self.adr3 = Some(tokenizer.take_line_value()), + "CITY" => self.city = Some(tokenizer.take_line_value()), + "STAE" => self.state = Some(tokenizer.take_line_value()), + "POST" => self.post = Some(tokenizer.take_line_value()), + "CTRY" => self.country = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Address Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Address Token: {:?}", tokenizer.current_token), diff --git a/src/types/copyright.rs b/src/types/copyright.rs index ed236dd..772738a 100644 --- a/src/types/copyright.rs +++ b/src/types/copyright.rs @@ -4,7 +4,6 @@ use serde::{Deserialize, Serialize}; use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - util::{dbg, take_line_value}, }; /// A copyright statement, as appropriate for the copyright laws applicable to this data. @@ -29,7 +28,7 @@ impl Copyright { impl Parser for Copyright { /// parse the COPR tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -39,9 +38,9 @@ impl Parser for Copyright { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "CONT" => self.continued = Some(take_line_value(tokenizer)), - "CONC" => self.continued = Some(take_line_value(tokenizer)), - _ => panic!("{} unhandled COPR tag in header: {}", dbg(&tokenizer), tag), + "CONT" => self.continued = Some(tokenizer.take_line_value()), + "CONC" => self.continued = Some(tokenizer.take_line_value()), + _ => panic!("{} unhandled COPR tag in header: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled tag in COPR: {:?}", tokenizer.current_token), diff --git a/src/types/corporation.rs b/src/types/corporation.rs index 5e0dd9a..453b161 100644 --- a/src/types/corporation.rs +++ b/src/types/corporation.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::Address, - util::{dbg, take_line_value}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -38,7 +37,7 @@ impl Corporation { impl Parser for Corporation { /// parse is for a CORP tag within the SOUR tag of a HEADER fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -49,11 +48,11 @@ impl Parser for Corporation { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), - "PHON" => self.phone = Some(take_line_value(tokenizer)), - "EMAIL" => self.email = Some(take_line_value(tokenizer)), - "FAX" => self.fax = Some(take_line_value(tokenizer)), - "WWW" => self.website = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled CORP tag: {}", dbg(tokenizer), tag), + "PHON" => self.phone = Some(tokenizer.take_line_value()), + "EMAIL" => self.email = Some(tokenizer.take_line_value()), + "FAX" => self.fax = Some(tokenizer.take_line_value()), + "WWW" => self.website = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled CORP tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( diff --git a/src/types/date.rs b/src/types/date.rs index a712505..30f83ac 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -1,7 +1,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - util::{dbg, take_line_value}, }; #[cfg(feature = "json")] @@ -43,7 +42,7 @@ impl Date { impl Parser for Date { /// parse handles the DATE tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -53,8 +52,8 @@ impl Parser for Date { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "TIME" => self.time = Some(take_line_value(tokenizer)), - _ => panic!("{} unhandled DATE tag: {}", dbg(tokenizer), tag), + "TIME" => self.time = Some(tokenizer.take_line_value()), + _ => panic!("{} unhandled DATE tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unexpected DATE token: {:?}", tokenizer.current_token), @@ -97,7 +96,7 @@ impl Parser for ChangeDate { Token::Tag(tag) => match tag.as_str() { "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - _ => panic!("{} unhandled ChangeDate tag: {}", dbg(tokenizer), tag), + _ => panic!("{} unhandled ChangeDate tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unexpected ChangeDate token: {:?}", tokenizer.current_token), diff --git a/src/types/event.rs b/src/types/event.rs index ccd776b..42b5029 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::SourceCitation, - util::{dbg, take_line_value}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -133,10 +132,10 @@ impl Parser for Event { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(take_line_value(tokenizer)), - "PLAC" => self.place = Some(take_line_value(tokenizer)), + "DATE" => self.date = Some(tokenizer.take_line_value()), + "PLAC" => self.place = Some(tokenizer.take_line_value()), "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Event Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Event Token: {:?}", tokenizer.current_token), diff --git a/src/types/family.rs b/src/types/family.rs index 9620bc7..c75fcdc 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{event::HasEvents, Event}, - util::{dbg, take_line_value}, }; #[cfg(feature = "json")] @@ -71,10 +70,10 @@ impl Parser for Family { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "MARR" => self.add_event(Event::new(tokenizer, level + 1, "MARR")), - "HUSB" => self.set_individual1(take_line_value(tokenizer)), - "WIFE" => self.set_individual2(take_line_value(tokenizer)), - "CHIL" => self.add_child(take_line_value(tokenizer)), - _ => panic!("{} Unhandled Family Tag: {}", dbg(tokenizer), tag), + "HUSB" => self.set_individual1(tokenizer.take_line_value()), + "WIFE" => self.set_individual2(tokenizer.take_line_value()), + "CHIL" => self.add_child(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Family Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Family Token: {:?}", tokenizer.current_token), diff --git a/src/types/header.rs b/src/types/header.rs index 452abd1..a94f887 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,9 +1,7 @@ -use crate::util::dbg; use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{Copyright, Corporation, Date, Note}, - util::{parse_custom_tag, take_line_value}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -74,21 +72,21 @@ impl Parser for Header { Token::Tag(tag) => match tag.as_str() { "GEDC" => self.gedcom = Some(GedcomDoc::new(tokenizer, 1)), "SOUR" => self.source = Some(HeadSour::new(tokenizer, 1)), - "DEST" => self.destination = Some(take_line_value(tokenizer)), + "DEST" => self.destination = Some(tokenizer.take_line_value()), "DATE" => self.date = Some(Date::new(tokenizer, 1)), - "SUBM" => self.submitter_tag = Some(take_line_value(tokenizer)), - "SUBN" => self.submission_tag = Some(take_line_value(tokenizer)), - "FILE" => self.filename = Some(take_line_value(tokenizer)), + "SUBM" => self.submitter_tag = Some(tokenizer.take_line_value()), + "SUBN" => self.submission_tag = Some(tokenizer.take_line_value()), + "FILE" => self.filename = Some(tokenizer.take_line_value()), "COPR" => self.copyright = Some(Copyright::new(tokenizer, 1)), "CHAR" => self.encoding = Some(Encoding::new(tokenizer, 1)), - "LANG" => self.language = Some(take_line_value(tokenizer)), + "LANG" => self.language = Some(tokenizer.take_line_value()), "NOTE" => self.note = Some(Note::new(tokenizer, 1)), "PLAC" => self.place = Some(HeadPlac::new(tokenizer, 1)), - _ => panic!("{} Unhandled Header Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Header Tag: {}", tokenizer.debug(), tag), }, Token::CustomTag(tag) => { let tag_clone = tag.clone(); - self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Header Token: {:?}", &tokenizer.current_token), @@ -133,10 +131,10 @@ impl Parser for GedcomDoc { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "VERS" => self.version = Some(take_line_value(tokenizer)), + "VERS" => self.version = Some(tokenizer.take_line_value()), // this is the only value that makes sense. warn them otherwise. "FORM" => { - let form = take_line_value(tokenizer); + let form = tokenizer.take_line_value(); if &form.to_uppercase() != "LINEAGE-LINKED" { println!( "WARNING: Unrecognized GEDCOM form. Expected LINEAGE-LINKED, found {}" @@ -144,12 +142,12 @@ impl Parser for GedcomDoc { } self.form = Some(form); } - _ => panic!("{} Unhandled GEDC Tag: {}", dbg(&tokenizer), tag), + _ => panic!("{} Unhandled GEDC Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( "{} Unexpected GEDC Token: {:?}", - dbg(&tokenizer), + tokenizer.debug(), &tokenizer.current_token ), } @@ -179,7 +177,7 @@ impl Encoding { impl Parser for Encoding { /// parse handles the parsing of the CHARS tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -189,13 +187,13 @@ impl Parser for Encoding { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "VERS" => self.version = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled CHAR Tag: {}", dbg(&tokenizer), tag), + "VERS" => self.version = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled CHAR Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( "{} Unexpected CHAR Token: {:?}", - dbg(&tokenizer), + tokenizer.debug(), &tokenizer.current_token ), } @@ -233,7 +231,7 @@ impl HeadSour { impl Parser for HeadSour { /// parse handles the SOUR tag in a header fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -243,11 +241,11 @@ impl Parser for HeadSour { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "VERS" => self.version = Some(take_line_value(tokenizer)), - "NAME" => self.name = Some(take_line_value(tokenizer)), + "VERS" => self.version = Some(tokenizer.take_line_value()), + "NAME" => self.name = Some(tokenizer.take_line_value()), "CORP" => self.corporation = Some(Corporation::new(tokenizer, level + 1)), "DATA" => self.data = Some(HeadSourData::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled CHAR Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled CHAR Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unexpected SOUR Token: {:?}", tokenizer.current_token), @@ -281,7 +279,7 @@ impl HeadSourData { impl Parser for HeadSourData { /// parse parses the DATA tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -293,7 +291,7 @@ impl Parser for HeadSourData { Token::Tag(tag) => match tag.as_str() { "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), "COPR" => self.copyright = Some(Copyright::new(tokenizer, level + 1)), - _ => panic!("{} unhandled DATA tag in header: {}", dbg(tokenizer), tag), + _ => panic!("{} unhandled DATA tag in header: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( @@ -358,7 +356,7 @@ impl Parser for HeadPlac { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "FORM" => { - let form = take_line_value(tokenizer); + let form = tokenizer.take_line_value(); let jurisdictional_titles = form.split(","); for t in jurisdictional_titles { @@ -366,7 +364,7 @@ impl Parser for HeadPlac { self.push_jurisdictional_title(v.to_string()); } } - _ => panic!("{} Unhandled PLAC tag in header: {}", dbg(&tokenizer), tag), + _ => panic!("{} Unhandled PLAC tag in header: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( diff --git a/src/types/individual.rs b/src/types/individual.rs index 91c9091..e1f2608 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{event::HasEvents, UserDefinedData, Event, MultimediaRecord, SourceCitation}, - util::{dbg, parse_custom_tag, take_line_value}, }; #[cfg(feature = "json")] @@ -104,18 +103,18 @@ impl Parser for Individual { // assuming it always only has a single DATE subtag tokenizer.next_token(); // level tokenizer.next_token(); // DATE tag - self.last_updated = Some(take_line_value(tokenizer)); + self.last_updated = Some(tokenizer.take_line_value()); } "SOUR" => { self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); } // TODO handle xref "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)), - _ => panic!("{} Unhandled Individual Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Individual Tag: {}", tokenizer.debug(), tag), }, Token::CustomTag(tag) => { let tag_clone = tag.clone(); - self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) } Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Individual Token: {:?}", tokenizer.current_token), @@ -153,7 +152,7 @@ impl Parser for Gender { "U" => Gender::Unknown, _ => panic!( "{} Unknown gender value {} ({})", - dbg(tokenizer), + tokenizer.debug(), gender_string, level ), @@ -191,7 +190,7 @@ pub struct FamilyLink(Xref, FamilyLinkType, Option); impl FamilyLink { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> FamilyLink { - let xref = take_line_value(tokenizer); + let xref = tokenizer.take_line_value(); let link_type = match tag { "FAMC" => FamilyLinkType::Child, "FAMS" => FamilyLinkType::Spouse, @@ -223,8 +222,8 @@ impl Parser for FamilyLink { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "PEDI" => self.set_pedigree(take_line_value(tokenizer).as_str()), - _ => panic!("{} Unhandled FamilyLink Tag: {}", dbg(tokenizer), tag), + "PEDI" => self.set_pedigree(tokenizer.take_line_value().as_str()), + _ => panic!("{} Unhandled FamilyLink Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled FamilyLink Token: {:?}", tokenizer.current_token), @@ -267,7 +266,7 @@ impl Name { impl Parser for Name { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -277,13 +276,13 @@ impl Parser for Name { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "GIVN" => self.given = Some(take_line_value(tokenizer)), - "NPFX" => self.prefix = Some(take_line_value(tokenizer)), - "NSFX" => self.suffix = Some(take_line_value(tokenizer)), - "SPFX" => self.surname_prefix = Some(take_line_value(tokenizer)), - "SURN" => self.surname = Some(take_line_value(tokenizer)), + "GIVN" => self.given = Some(tokenizer.take_line_value()), + "NPFX" => self.prefix = Some(tokenizer.take_line_value()), + "NSFX" => self.suffix = Some(tokenizer.take_line_value()), + "SPFX" => self.surname_prefix = Some(tokenizer.take_line_value()), + "SURN" => self.surname = Some(tokenizer.take_line_value()), "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Name Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Name Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Name Token: {:?}", tokenizer.current_token), diff --git a/src/types/mod.rs b/src/types/mod.rs index 7524546..0450fa9 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -13,6 +13,9 @@ pub use event::{Event, EventType}; pub mod date; pub use date::{ChangeDate, Date}; +mod place; +pub use place::*; + mod address; pub use address::*; diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index db0005c..5cb2f1d 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{Note, SourceCitation, Xref}, - util::{dbg, take_line_value}, }; use super::ChangeDate; @@ -71,18 +70,18 @@ impl Parser for MultimediaRecord { Token::Tag(tag) => match tag.as_str() { "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), - "TITL" => self.title = Some(take_line_value(tokenizer)), + "TITL" => self.title = Some(tokenizer.take_line_value()), "REFN" => { self.user_reference_number = Some(UserReferenceNumber::new(tokenizer, level + 1)) } - "RIN" => self.automated_record_id = Some(take_line_value(tokenizer)), + "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()), "NOTE" => self.note_structure = Some(Note::new(tokenizer, level + 1)), "SOUR" => { self.source_citation = Some(SourceCitation::new(tokenizer, level + 1)) } "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Multimedia Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Multimedia Tag: {}", tokenizer.debug(), tag), }, _ => panic!("Unhandled Multimedia Token: {:?}", tokenizer.current_token), } @@ -134,8 +133,8 @@ impl Parser for MultimediaLink { Token::Tag(tag) => match tag.as_str() { "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), - "TITL" => self.title = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled Multimedia Tag: {}", dbg(tokenizer), tag), + "TITL" => self.title = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Multimedia Tag: {}", tokenizer.debug(), tag), }, _ => panic!("Unhandled Multimedia Token: {:?}", tokenizer.current_token), } @@ -166,7 +165,7 @@ impl MultimediaFileRefn { impl Parser for MultimediaFileRefn { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(curl_level) = &tokenizer.current_token { if curl_level <= &level { @@ -176,11 +175,11 @@ impl Parser for MultimediaFileRefn { tokenizer.next_token(); match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "TITL" => self.title = Some(take_line_value(tokenizer)), + "TITL" => self.title = Some(tokenizer.take_line_value()), "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), _ => panic!( "{} Unhandled MultimediaFileRefn Tag: {}", - dbg(tokenizer), + tokenizer.debug(), tag ), }, @@ -218,7 +217,7 @@ impl MultimediaFormat { impl Parser for MultimediaFormat { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(curl_level) = &tokenizer.current_token { if curl_level <= &level { @@ -228,8 +227,8 @@ impl Parser for MultimediaFormat { tokenizer.next_token(); match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "TYPE" => self.source_media_type = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled MultimediaFormat Tag: {}", dbg(tokenizer), tag), + "TYPE" => self.source_media_type = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled MultimediaFormat Tag: {}", tokenizer.debug(), tag), }, _ => panic!( "Unhandled MultimediaFormat Token: {:?}", @@ -264,7 +263,7 @@ impl UserReferenceNumber { impl Parser for UserReferenceNumber { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(curl_level) = &tokenizer.current_token { @@ -274,10 +273,10 @@ impl Parser for UserReferenceNumber { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "TYPE" => self.user_reference_type = Some(take_line_value(tokenizer)), + "TYPE" => self.user_reference_type = Some(tokenizer.take_line_value()), _ => panic!( "{} Unhandled UserReferenceNumber Tag: {}", - dbg(tokenizer), + tokenizer.debug(), tag ), }, diff --git a/src/types/note.rs b/src/types/note.rs index 4228ea7..ae3002a 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -2,8 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{Source, Translation}, - util::dbg, - util::take_line_value, }; #[cfg(feature = "json")] @@ -52,7 +50,7 @@ impl Parser for Note { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { let mut value = String::new(); - value.push_str(&take_line_value(tokenizer)); + value.push_str(&tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -63,15 +61,15 @@ impl Parser for Note { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "MIME" => self.mime = Some(take_line_value(tokenizer)), + "MIME" => self.mime = Some(tokenizer.take_line_value()), "TRANS" => self.translation = Some(Translation::new(tokenizer, level + 1)), - "LANG" => self.language = Some(take_line_value(tokenizer)), - "CONC" => value.push_str(&take_line_value(tokenizer)), + "LANG" => self.language = Some(tokenizer.take_line_value()), + "CONC" => value.push_str(&tokenizer.take_line_value()), "CONT" => { value.push('\n'); - value.push_str(&take_line_value(tokenizer)); + value.push_str(&tokenizer.take_line_value()); } - _ => panic!("{} unhandled NOTE tag: {}", dbg(&tokenizer), tag), + _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), diff --git a/src/types/place.rs b/src/types/place.rs index c74812a..407ae0e 100644 --- a/src/types/place.rs +++ b/src/types/place.rs @@ -1,4 +1,3 @@ -use crate::types::{Address, Date, Note}; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; diff --git a/src/types/repository.rs b/src/types/repository.rs index 0712f88..ac72e5f 100644 --- a/src/types/repository.rs +++ b/src/types/repository.rs @@ -1,7 +1,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - util::{dbg, take_line_value}, }; use super::{Address, Xref}; @@ -45,9 +44,9 @@ impl Parser for Repository { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "NAME" => self.name = Some(take_line_value(tokenizer)), + "NAME" => self.name = Some(tokenizer.take_line_value()), "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Repository Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Repository Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Repository Token: {:?}", tokenizer.current_token), @@ -69,7 +68,7 @@ pub struct RepoCitation { impl RepoCitation { pub fn new(tokenizer: &mut Tokenizer, level: u8) -> RepoCitation { let mut rc = RepoCitation { - xref: take_line_value(tokenizer), + xref: tokenizer.take_line_value(), call_number: None, }; rc.parse(tokenizer, level); @@ -87,8 +86,8 @@ impl Parser for RepoCitation { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "CALN" => self.call_number = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled RepoCitation Tag: {}", dbg(tokenizer), tag), + "CALN" => self.call_number = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled RepoCitation Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( diff --git a/src/types/source.rs b/src/types/source.rs index 407fd5e..77c8ac8 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{Event, RepoCitation, UserDefinedData}, - util::{dbg, take_continued_text, take_line_value, parse_custom_tag}, }; #[cfg(feature = "json")] @@ -58,16 +57,16 @@ impl Parser for Source { Token::Tag(tag) => match tag.as_str() { "DATA" => tokenizer.next_token(), "EVEN" => { - let events_recorded = take_line_value(tokenizer); + let events_recorded = tokenizer.take_line_value(); let mut event = Event::new(tokenizer, level + 2, "OTHER"); event.with_source_data(events_recorded); self.data.add_event(event); } - "AGNC" => self.data.agency = Some(take_line_value(tokenizer)), - "ABBR" => self.abbreviation = Some(take_continued_text(tokenizer, level + 1)), - "TITL" => self.title = Some(take_continued_text(tokenizer, level + 1)), + "AGNC" => self.data.agency = Some(tokenizer.take_line_value()), + "ABBR" => self.abbreviation = Some(tokenizer.take_continued_text(level + 1)), + "TITL" => self.title = Some(tokenizer.take_continued_text(level + 1)), "REPO" => self.add_repo_citation(RepoCitation::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Source Tag: {}", dbg(tokenizer), tag), + _ => panic!("{} Unhandled Source Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Source Token: {:?}", tokenizer.current_token), @@ -105,7 +104,7 @@ impl SourceCitation { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8) -> SourceCitation { let mut citation = SourceCitation { - xref: take_line_value(tokenizer), + xref: tokenizer.take_line_value(), page: None, custom_data: Vec::new(), }; @@ -128,12 +127,12 @@ impl Parser for SourceCitation { } match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "PAGE" => self.page = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled Citation Tag: {}", dbg(tokenizer), tag), + "PAGE" => self.page = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Citation Tag: {}", tokenizer.debug(), tag), }, Token::CustomTag(tag) => { let tag_clone = tag.clone(); - self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)) + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) } Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Citation Token: {:?}", tokenizer.current_token), diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 41c655a..8fa7f72 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -2,7 +2,6 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, types::{Address, ChangeDate, UserDefinedData, MultimediaLink, Note}, - util::{dbg, parse_custom_tag, take_line_value}, }; #[cfg(feature = "json")] @@ -80,21 +79,21 @@ impl Parser for Submitter { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "NAME" => self.name = Some(take_line_value(tokenizer)), + "NAME" => self.name = Some(tokenizer.take_line_value()), "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), "OBJE" => { self.add_multimedia(MultimediaLink::new(tokenizer, level + 1, pointer)) } - "LANG" => self.language = Some(take_line_value(tokenizer)), + "LANG" => self.language = Some(tokenizer.take_line_value()), "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - "PHON" => self.phone = Some(take_line_value(tokenizer)), - _ => panic!("{} Unhandled Submitter Tag: {}", dbg(tokenizer), tag), + "PHON" => self.phone = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Submitter Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), Token::CustomTag(tag) => { let tag_clone = tag.clone(); - self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); } _ => panic!("Unhandled Submitter Token: {:?}", tokenizer.current_token), } diff --git a/src/types/translation.rs b/src/types/translation.rs index 84995cf..15f1d87 100644 --- a/src/types/translation.rs +++ b/src/types/translation.rs @@ -4,8 +4,6 @@ use serde::{Deserialize, Serialize}; use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - util::dbg, - util::take_line_value, }; /// Translation (tag:TRAN) is a type of TRAN for unstructured human-readable text, such as @@ -36,7 +34,7 @@ impl Parser for Translation { ///parse handles the TRAN tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(take_line_value(tokenizer)); + self.value = Some(tokenizer.take_line_value()); loop { if let Token::Level(cur_level) = tokenizer.current_token { @@ -47,9 +45,9 @@ impl Parser for Translation { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "MIME" => self.mime = Some(take_line_value(tokenizer)), - "LANG" => self.language = Some(take_line_value(tokenizer)), - _ => panic!("{} unhandled NOTE tag: {}", dbg(&tokenizer), tag), + "MIME" => self.mime = Some(tokenizer.take_line_value()), + "LANG" => self.language = Some(tokenizer.take_line_value()), + _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), diff --git a/src/util.rs b/src/util.rs index 0e6a2d4..65eb740 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,8 +1,3 @@ -use crate::{ - tokenizer::{Token, Tokenizer}, - types::UserDefinedData, -}; - /// Macro for displaying `Option`s in debug mode without the text wrapping. #[macro_export] macro_rules! fmt_optional_value { @@ -14,68 +9,3 @@ macro_rules! fmt_optional_value { } }; } - -/// Debug function displaying GEDCOM line number of error message. -pub fn dbg(tokenizer: &Tokenizer) -> String { - format!("line {}:", tokenizer.line) -} - -/// Grabs and returns to the end of the current line as a String -pub fn take_line_value(tokenizer: &mut Tokenizer) -> String { - let value: String; - tokenizer.next_token(); - - if let Token::LineValue(val) = &tokenizer.current_token { - value = val.to_string(); - } else { - panic!( - "{} Expected LineValue, found {:?}", - dbg(&tokenizer), - tokenizer.current_token - ); - } - tokenizer.next_token(); - value -} - -pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> UserDefinedData { - let value = take_line_value(tokenizer); - UserDefinedData { tag, value } -} - -/// Takes the value of the current line including handling -/// multi-line values from CONT & CONC tags. -pub fn take_continued_text(tokenizer: &mut Tokenizer, level: u8) -> String { - let mut value = take_line_value(tokenizer); - - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" => { - value.push('\n'); - value.push_str(&take_line_value(tokenizer)) - } - "CONC" => { - value.push(' '); - value.push_str(&take_line_value(tokenizer)) - } - _ => panic!( - "{} Unhandled Continuation Tag: {}", - dbg(tokenizer), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled Continuation Token: {:?}", - tokenizer.current_token - ), - } - } - value -} From dba368c608ff041aac853b0cf0204c5c48babfce Mon Sep 17 00:00:00 2001 From: ge3224 Date: Thu, 24 Nov 2022 23:17:38 -0600 Subject: [PATCH 32/55] Format tokenizer and copyright --- src/tokenizer.rs | 5 +---- src/types/copyright.rs | 6 +++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a3ca781..ee772a4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -202,10 +202,7 @@ impl<'a> Tokenizer<'a> { _ => panic!("{} Unhandled Continuation Tag: {}", self.debug(), tag), }, Token::Level(_) => self.next_token(), - _ => panic!( - "Unhandled Continuation Token: {:?}", - self.current_token - ), + _ => panic!("Unhandled Continuation Token: {:?}", self.current_token), } } value diff --git a/src/types/copyright.rs b/src/types/copyright.rs index 772738a..ebc87a3 100644 --- a/src/types/copyright.rs +++ b/src/types/copyright.rs @@ -40,7 +40,11 @@ impl Parser for Copyright { Token::Tag(tag) => match tag.as_str() { "CONT" => self.continued = Some(tokenizer.take_line_value()), "CONC" => self.continued = Some(tokenizer.take_line_value()), - _ => panic!("{} unhandled COPR tag in header: {}", tokenizer.debug(), tag), + _ => panic!( + "{} unhandled COPR tag in header: {}", + tokenizer.debug(), + tag + ), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled tag in COPR: {:?}", tokenizer.current_token), From 123566e0475a185c38ab97ce398ef26ddbaadf1c Mon Sep 17 00:00:00 2001 From: ge3224 Date: Thu, 24 Nov 2022 23:19:26 -0600 Subject: [PATCH 33/55] Handle more tags in SourceCitation and Event structures --- src/types/date.rs | 2 +- src/types/event.rs | 12 +- src/types/family.rs | 83 ++++++++++- src/types/header.rs | 185 ++++++++++++++++++++++- src/types/individual.rs | 11 +- src/types/multimedia.rs | 165 ++++++++++++++++++++- src/types/note.rs | 35 ++++- src/types/repository.rs | 2 +- src/types/source.rs | 309 ++++++++++++++++++++++++++++++++++++++- src/types/translation.rs | 2 +- tests/header.rs | 287 ------------------------------------ tests/multimedia.rs | 117 --------------- 12 files changed, 782 insertions(+), 428 deletions(-) delete mode 100644 tests/header.rs delete mode 100644 tests/multimedia.rs diff --git a/src/types/date.rs b/src/types/date.rs index 30f83ac..209cf15 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use super::Note; /// TODO Date should encompasses a number of date formats, e.g. approximated, period, phrase and range. -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Date { pub value: Option, diff --git a/src/types/event.rs b/src/types/event.rs index 42b5029..91b69e5 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,7 +1,7 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - types::SourceCitation, + types::{ChildToFamilyLink, Note, SourceCitation}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -37,6 +37,8 @@ pub struct Event { pub event: EventType, pub date: Option, pub place: Option, + pub note: Option, + pub child_to_family_link: Option, pub citations: Vec, } @@ -47,6 +49,8 @@ impl Event { event: Self::from_tag(tag), date: None, place: None, + note: None, + child_to_family_link: None, citations: Vec::new(), }; event.parse(tokenizer, level); @@ -120,7 +124,6 @@ pub trait HasEvents { impl Parser for Event { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - tokenizer.next_token(); loop { @@ -135,6 +138,11 @@ impl Parser for Event { "DATE" => self.date = Some(tokenizer.take_line_value()), "PLAC" => self.place = Some(tokenizer.take_line_value()), "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), + "FAMC" => { + self.child_to_family_link = + Some(ChildToFamilyLink::new(tokenizer, level + 1)) + }, + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), diff --git a/src/types/family.rs b/src/types/family.rs index c75fcdc..ee5611f 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,7 +1,7 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, Event}, + types::{event::HasEvents, Event, Note}, }; #[cfg(feature = "json")] @@ -96,3 +96,84 @@ impl HasEvents for Family { self.events.clone() } } + +/// ChildToFamilyLink ...TODO +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 NAME given name /surname/jr. +/// 1 BIRT +/// 2 DATE 31 DEC 1997 +/// 2 PLAC The place +/// 2 FAMC @PARENTS@ +/// 0 TRLR"; +/// +/// let mut ged = GedcomRecord::new(sample.chars()); +/// let data = ged.parse_record(); +/// +/// assert_eq!(data.individuals[0].events[0].child_to_family_link.as_ref().unwrap().xref.as_ref().unwrap(), "@PARENTS@"); +/// +/// ``` +#[derive(Clone, Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct ChildToFamilyLink { + pub xref: Option, + pub pedigree_linkage_type: Option, + pub child_linkage_status: Option, + pub note: Option, +} + +impl ChildToFamilyLink { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> ChildToFamilyLink { + let mut famc = ChildToFamilyLink { + xref: None, + pedigree_linkage_type: None, + child_linkage_status: None, + note: None, + }; + famc.parse(tokenizer, level); + famc + } +} + +impl Parser for ChildToFamilyLink { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + self.xref = Some(tokenizer.take_line_value()); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + tokenizer.next_token(); + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "PEDI" => self.pedigree_linkage_type = Some(tokenizer.take_line_value()), + "STAT" => self.child_linkage_status = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + _ => panic!( + "{} unhandled ChildToFamilyLink tag: {}", + tokenizer.debug(), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unhandled ChildToFamilyLink Token: {:?}", + tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/header.rs b/src/types/header.rs index a94f887..764171d 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -10,6 +10,57 @@ use super::UserDefinedData; /// Header (tag: HEAD) containing GEDCOM metadata. /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 DEST Destination of transmission\n\ +/// 1 DATE 1 JAN 1998\n\ +/// 2 TIME 13:57:24.80\n\ +/// 1 SUBM @SUBMITTER@\n\ +/// 1 SUBN @SUBMISSION@\n\ +/// 1 FILE ALLGED.GED\n\ +/// 1 COPR (C) 1997-2000 by H. Eichmann.\n\ +/// 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ +/// 1 LANG language +/// 0 TRLR"; +/// +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); +/// +/// let header = data.header.unwrap(); +/// assert_eq!(header.gedcom.unwrap().version.unwrap(), "5.5"); +/// +/// assert_eq!( +/// header.destination.unwrap(), +/// "Destination of transmission" +/// ); +/// +/// let date = header.date.unwrap(); +/// assert_eq!(date.value.unwrap(), "1 JAN 1998"); +/// assert_eq!(date.time.unwrap(), "13:57:24.80"); +/// +/// let subm = header.submission_tag.unwrap(); +/// assert_eq!(subm, "@SUBMISSION@"); +/// +/// let file = header.filename.unwrap(); +/// assert_eq!(file, "ALLGED.GED"); +/// +/// let copr = header.copyright.unwrap(); +/// assert_eq!(copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); +/// assert_eq!( +/// copr.continued.unwrap(), +/// "You can use and distribute this file freely as long as you do not charge for it." +/// ); +/// +/// let lang = header.language.unwrap(); +/// assert_eq!(lang.as_str(), "language"); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Header { @@ -87,7 +138,7 @@ impl Parser for Header { Token::CustomTag(tag) => { let tag_clone = tag.clone(); self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) - }, + } Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Header Token: {:?}", &tokenizer.current_token), } @@ -98,6 +149,25 @@ impl Parser for Header { /// GedcomDoc (tag: GEDC) is a container for information about the entire document. It is /// recommended that applications write GEDC with its required subrecord VERS as the first /// substructure of a HEAD. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#GEDC +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 TRLR"; +/// +/// let mut ged = GedcomRecord::new(sample.chars()); +/// let data = ged.parse_record(); +/// +/// let head_gedc = data.header.unwrap().gedcom.unwrap(); +/// assert_eq!(head_gedc.version.unwrap(), "5.5"); +/// assert_eq!(head_gedc.form.unwrap(), "LINEAGE-LINKED"); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct GedcomDoc { @@ -157,6 +227,29 @@ impl Parser for GedcomDoc { /// Encoding (tag: CHAR) is a code value that represents the character set to be used to /// interpret this data. See Gedcom 5.5.1 specification, p. 44 +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 CHAR ASCII\n\ +/// 2 VERS Version number of ASCII (whatever it means)\n\ +/// 0 TRLR"; + +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); + +/// let h_char = data.header.unwrap().encoding.unwrap(); +/// assert_eq!(h_char.value.unwrap(), "ASCII"); +/// assert_eq!( +/// h_char.version.unwrap(), +/// "Version number of ASCII (whatever it means)" +/// ); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Encoding { @@ -205,6 +298,32 @@ impl Parser for Encoding { /// registration process for these identifiers existed for a time, but no longer does. If an /// existing identifier is known, it should be used. Otherwise, a URI owned by the product should /// be used instead. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-SOUR +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 SOUR SOURCE_NAME\n\ +/// 2 VERS Version number of source-program\n\ +/// 2 NAME Name of source-program\n\ +/// 0 TRLR"; +/// +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); +/// +/// let sour = data.header.unwrap().source.unwrap(); +/// assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); +/// +/// let vers = sour.version.unwrap(); +/// assert_eq!(vers, "Version number of source-program"); +/// +/// let name = sour.name.unwrap(); +/// assert_eq!(name, "Name of source-program"); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct HeadSour { @@ -257,6 +376,35 @@ impl Parser for HeadSour { /// The electronic data source or digital repository from which this dataset was exported. The /// payload is the name of that source, with substructures providing additional details about the /// source (not the export). See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-SOUR-DATA +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 SOUR SOURCE_NAME\n\ +/// 2 DATA Name of source data\n\ +/// 3 DATE 1 JAN 1998\n\ +/// 3 COPR Copyright of source data\n\ +/// 0 TRLR"; +/// +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); +/// +/// let sour = data.header.unwrap().source.unwrap(); +/// assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); +/// +/// let sour_data = sour.data.unwrap(); +/// assert_eq!(sour_data.value.unwrap(), "Name of source data"); +/// assert_eq!(sour_data.date.unwrap().value.unwrap(), "1 JAN 1998"); +/// assert_eq!( +/// sour_data.copyright.unwrap().value.unwrap(), +/// "Copyright of source data" +/// ); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct HeadSourData { @@ -291,7 +439,11 @@ impl Parser for HeadSourData { Token::Tag(tag) => match tag.as_str() { "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), "COPR" => self.copyright = Some(Copyright::new(tokenizer, level + 1)), - _ => panic!("{} unhandled DATA tag in header: {}", tokenizer.debug(), tag), + _ => panic!( + "{} unhandled DATA tag in header: {}", + tokenizer.debug(), + tag + ), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( @@ -305,6 +457,28 @@ impl Parser for HeadSourData { /// HeadPlace (tag: PLAC) is is a placeholder for providing a default PLAC.FORM, and must not have /// a payload. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 PLAC\n\ +/// 2 FORM City, County, State, Country\n\ +/// 0 TRLR"; +/// +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); +/// +/// let h_plac = data.header.unwrap().place.unwrap(); +/// assert_eq!(h_plac.form[0], "City"); +/// assert_eq!(h_plac.form[1], "County"); +/// assert_eq!(h_plac.form[2], "State"); +/// assert_eq!(h_plac.form[3], "Country"); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct HeadPlac { @@ -343,7 +517,6 @@ impl HeadPlac { impl Parser for HeadPlac { /// parse handles the PLAC tag when present in header fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - // In the header, PLAC should have no payload. See // https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC tokenizer.next_token(); @@ -364,7 +537,11 @@ impl Parser for HeadPlac { self.push_jurisdictional_title(v.to_string()); } } - _ => panic!("{} Unhandled PLAC tag in header: {}", tokenizer.debug(), tag), + _ => panic!( + "{} Unhandled PLAC tag in header: {}", + tokenizer.debug(), + tag + ), }, Token::Level(_) => tokenizer.next_token(), _ => panic!( diff --git a/src/types/individual.rs b/src/types/individual.rs index e1f2608..279334d 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,7 +1,7 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, UserDefinedData, Event, MultimediaRecord, SourceCitation}, + types::{event::HasEvents, Event, MultimediaRecord, Note, SourceCitation, UserDefinedData}, }; #[cfg(feature = "json")] @@ -21,7 +21,7 @@ pub struct Individual { pub last_updated: Option, pub source: Vec, pub multimedia: Vec, - events: Vec, + pub events: Vec, } impl Individual { @@ -109,7 +109,9 @@ impl Parser for Individual { self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); } // TODO handle xref - "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)), + "OBJE" => { + self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)) + } _ => panic!("{} Unhandled Individual Tag: {}", tokenizer.debug(), tag), }, Token::CustomTag(tag) => { @@ -240,6 +242,7 @@ pub struct Name { pub surname: Option, pub prefix: Option, pub surname_prefix: Option, + pub note: Option, pub suffix: Option, pub source: Vec, } @@ -252,6 +255,7 @@ impl Name { surname: None, prefix: None, surname_prefix: None, + note: None, suffix: None, source: Vec::new(), }; @@ -282,6 +286,7 @@ impl Parser for Name { "SPFX" => self.surname_prefix = Some(tokenizer.take_line_value()), "SURN" => self.surname = Some(tokenizer.take_line_value()), "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Name Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index 5cb2f1d..255f091 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -19,6 +19,85 @@ use super::ChangeDate; /// The change and creation dates should be for the OBJE record itself, not the underlying files. /// /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#MULTIMEDIA_RECORD. +/// +/// # Example +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @MEDIA1@ OBJE\n\ +/// 1 FILE /home/user/media/file_name.bmp\n\ +/// 2 FORM bmp\n\ +/// 3 TYPE photo +/// 2 TITL A Bitmap\n\ +/// 1 REFN 000\n\ +/// 2 TYPE User Reference Type\n\ +/// 1 RIN Automated Id\n\ +/// 1 NOTE A note\n\ +/// 2 CONT Note continued here. The word TE\n\ +/// 2 CONC ST should not be broken!\n\ +/// 1 SOUR @SOUR1@\n\ +/// 2 PAGE 42 +/// 2 _CUSTOM Custom data\n\ +/// 1 CHAN +/// 2 DATE 1 APR 1998 +/// 3 TIME 12:34:56.789 +/// 2 NOTE A note +/// 3 CONT Note continued here. The word TE +/// 3 CONC ST should not be broken! +/// 0 TRLR"; +/// +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); +/// assert_eq!(data.multimedia.len(), 1); +/// +/// let obje = &data.multimedia[0]; +/// assert_eq!(obje.xref.as_ref().unwrap(), "@MEDIA1@"); +/// +/// let file = obje.file.as_ref().unwrap(); +/// assert_eq!( +/// file.value.as_ref().unwrap(), +/// "/home/user/media/file_name.bmp" +/// ); +/// +/// assert_eq!(file.title.as_ref().unwrap(), "A Bitmap"); +/// +/// let form = file.form.as_ref().unwrap(); +/// assert_eq!(form.value.as_ref().unwrap(), "bmp"); +/// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); +/// +/// let user_ref = obje.user_reference_number.as_ref().unwrap(); +/// assert_eq!(user_ref.value.as_ref().unwrap(), "000"); +/// assert_eq!( +/// user_ref.user_reference_type.as_ref().unwrap(), +/// "User Reference Type" +/// ); +/// +/// assert_eq!(obje.automated_record_id.as_ref().unwrap(), "Automated Id"); +/// +/// let note = obje.note_structure.as_ref().unwrap(); +/// assert_eq!( +/// note.value.as_ref().unwrap(), +/// "A note\nNote continued here. The word TEST should not be broken!" +/// ); +/// +/// let sour = obje.source_citation.as_ref().unwrap(); +/// assert_eq!(sour.xref, "@SOUR1@"); +/// assert_eq!(sour.page.as_ref().unwrap(), "42"); +/// assert_eq!(sour.custom_data.len(), 1); +/// assert_eq!(sour.custom_data[0].value, "Custom data"); +/// +/// let chan = obje.change_date.as_ref().unwrap(); +/// let date = chan.date.as_ref().unwrap(); +/// assert_eq!(date.value.as_ref().unwrap(), "1 APR 1998"); +/// assert_eq!(date.time.as_ref().unwrap(), "12:34:56.789"); +/// +/// let chan_note = chan.note.as_ref().unwrap(); +/// assert_eq!(chan_note.value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); +/// ``` pub struct MultimediaRecord { /// Optional reference to link to this submitter pub xref: Option, @@ -89,9 +168,45 @@ impl Parser for MultimediaRecord { } } +/// MultimediaLink... TODO +/// +/// # Example +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 CHAR UTF-8\n\ +/// 1 SOUR Ancestry.com Family Trees\n\ +/// 2 VERS (2010.3)\n\ +/// 2 NAME Ancestry.com Family Trees\n\ +/// 2 CORP Ancestry.com\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 OBJE\n\ +/// 1 FILE http://trees.ancestry.com/rd?f=image&guid=Xxxxxxxx-Xxxx-Xxxx-Xxxx-Xxxxxxxxxxxx&tid=Xxxxxxxx&pid=1\n\ +/// 1 FORM jpg\n\ +/// 1 TITL In Prague\n\ +/// 0 TRLR"; +/// +/// let mut record = GedcomRecord::new(sample.chars()); +/// let data = record.parse_record(); +/// assert_eq!(data.multimedia.len(), 1); +/// +/// let obje = &data.multimedia[0]; +/// assert_eq!(obje.title.as_ref().unwrap(), "In Prague"); +/// +/// let form = obje.form.as_ref().unwrap(); +/// assert_eq!(form.value.as_ref().unwrap(), "jpg"); +/// +/// let file = obje.file.as_ref().unwrap(); +/// assert_eq!( +/// file.value.as_ref().unwrap(), +/// "http://trees.ancestry.com/rd?f=image&guid=Xxxxxxxx-Xxxx-Xxxx-Xxxx-Xxxxxxxxxxxx&tid=Xxxxxxxx&pid=1" +/// ); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// MultimediaLink pub struct MultimediaLink { /// Optional reference to link to this submitter pub xref: Option, @@ -142,12 +257,54 @@ impl Parser for MultimediaLink { } } -#[derive(Debug, Default)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] - /// A complete local or remote file reference to the auxiliary data to be linked to the GEDCOM /// context. Remote reference would include a network address where the multimedia data may /// be obtained. +/// # Example +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @MEDIA1@ OBJE\n\ +/// 1 FILE /home/user/media/file_name.bmp\n\ +/// 2 FORM bmp\n\ +/// 3 TYPE photo +/// 2 TITL A Bitmap\n\ +/// 1 REFN 000\n\ +/// 2 TYPE User Reference Type\n\ +/// 0 TRLR"; +/// +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); +/// assert_eq!(data.multimedia.len(), 1); +/// +/// let obje = &data.multimedia[0]; +/// assert_eq!(obje.xref.as_ref().unwrap(), "@MEDIA1@"); +/// +/// let file = obje.file.as_ref().unwrap(); +/// assert_eq!( +/// file.value.as_ref().unwrap(), +/// "/home/user/media/file_name.bmp" +/// ); +/// +/// assert_eq!(file.title.as_ref().unwrap(), "A Bitmap"); +/// +/// let form = file.form.as_ref().unwrap(); +/// assert_eq!(form.value.as_ref().unwrap(), "bmp"); +/// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); +/// +/// let user_ref = obje.user_reference_number.as_ref().unwrap(); +/// assert_eq!(user_ref.value.as_ref().unwrap(), "000"); +/// assert_eq!( +/// user_ref.user_reference_type.as_ref().unwrap(), +/// "User Reference Type" +/// ); +/// ``` +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaFileRefn { pub value: Option, pub title: Option, diff --git a/src/types/note.rs b/src/types/note.rs index ae3002a..2da582a 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -15,7 +15,40 @@ use serde::{Deserialize, Serialize}; /// information the document contains. /// /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#NOTE -#[derive(Debug, Default)] +/// +/// # Example +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 NOTE A general note about this file:\n\ +/// 2 CONT It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1:\n\ +/// 2 CONT His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given),\n\ +/// 2 CONT adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7).\n\ +/// 2 CONT In PERSON1, FAMILY1, SUBMITTER, SUBMISSION and SOURCE1 as many datafields as possible are used.\n\ +/// 2 CONT All other individuals/families contain no data. Note, that many data tags can appear more than once\n\ +/// 2 CONT (in this transmission this is demonstrated with tags: NAME, OCCU, PLACE and NOTE. Seek the word 'another'.\n\ +/// 2 CONT The data transmitted here do not make sence. Just the HEAD.DATE tag contains the date of the creation\n\ +/// 2 CONT of this file and will change in future Versions!\n\ +/// 2 CONT This file is created by H. Eichmann: h.eichmann@@gmx.de. Feel free to copy and use it for any\n\ +/// 2 CONT non-commercial purpose. For the creation the GEDCOM standard Release 5.5 (2 JAN 1996) has been used.\n\ +/// 2 CONT Copyright: gedcom@@gedcom.org\n\ +/// 2 CONT Download it (the GEDCOM 5.5 specs) from: ftp.gedcom.com/pub/genealogy/gedcom.\n\ +/// 2 CONT Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit).\n\ +/// 2 CONT This @@ (commercial at) character may only appear ONCE!\n\ +/// 2 CONT Note continued here. The word TE\n\ +/// 2 CONC ST should not be broken!\n\ +/// 0 TRLR"; + +/// let mut parser = GedcomRecord::new(sample.chars()); +/// let data = parser.parse_record(); + +/// let note = data.header.unwrap().note.unwrap(); +/// assert_eq!(note.value.unwrap().chars().count(), 1440); +/// ``` +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Note { pub value: Option, diff --git a/src/types/repository.rs b/src/types/repository.rs index ac72e5f..13b8b87 100644 --- a/src/types/repository.rs +++ b/src/types/repository.rs @@ -56,7 +56,7 @@ impl Parser for Repository { } /// Citation linking a `Source` to a data `Repository` -#[derive(Debug)] +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct RepoCitation { /// Reference to the `Repository` diff --git a/src/types/source.rs b/src/types/source.rs index 77c8ac8..7aeeae5 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,15 +1,15 @@ use crate::{ parser::Parser, tokenizer::{Token, Tokenizer}, - types::{Event, RepoCitation, UserDefinedData}, + types::{Date, Event, Note, RepoCitation, UserDefinedData}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use super::{Xref}; +use super::Xref; -#[derive(Debug)] +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] /// Source for genealogy facts pub struct Source { @@ -76,7 +76,7 @@ impl Parser for Source { } #[allow(clippy::module_name_repetitions)] -#[derive(Debug)] +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct SourceData { events: Vec, @@ -89,7 +89,29 @@ impl SourceData { } } -/// Citation linking a genealogy fact to a data `Source` +/// The data provided in the `SourceCitation` structure is source-related information specific to +/// the data being cited. (See GEDCOM 5.5 Specification page 39.) +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 SOUR @SOURCE1@\n\ +/// 2 PAGE 42\n\ +/// 0 TRLR"; +/// +/// let mut ged = GedcomRecord::new(sample.chars()); +/// let data = ged.parse_record(); +/// +/// assert_eq!(data.individuals[0].source[0].xref, "@SOURCE1@"); +/// assert_eq!(data.individuals[0].source[0].page.as_ref().unwrap(), "42"); +/// ``` #[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct SourceCitation { @@ -97,6 +119,9 @@ pub struct SourceCitation { pub xref: Xref, /// Page number of source pub page: Option, + pub data: Option, + pub note: Option, + pub certainty_assessment: Option, pub custom_data: Vec, } @@ -106,6 +131,9 @@ impl SourceCitation { let mut citation = SourceCitation { xref: tokenizer.take_line_value(), page: None, + data: None, + note: None, + certainty_assessment: None, custom_data: Vec::new(), }; citation.parse(tokenizer, level); @@ -125,10 +153,21 @@ impl Parser for SourceCitation { break; } } + match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PAGE" => self.page = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled Citation Tag: {}", tokenizer.debug(), tag), + "DATA" => self.data = Some(SourceCitationData::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "QUAY" => { + self.certainty_assessment = + Some(CertaintyAssessment::new(tokenizer, level + 1)) + } + _ => panic!( + "{} Unhandled SourceCitation Tag: {}", + tokenizer.debug(), + tag + ), }, Token::CustomTag(tag) => { let tag_clone = tag.clone(); @@ -140,3 +179,261 @@ impl Parser for SourceCitation { } } } + +/// SourceCitationData is a substructure of SourceCitation, associated with the SOUR.DATA tag. +/// Actual text from the source that was used in making assertions, for example a date phrase as +/// actually recorded in the source, or significant notes written by the recorder, or an applicable +/// sentence from a letter. This is stored in the SOUR.DATA.TEXT context. +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 SOUR @SOURCE1@\n\ +/// 2 PAGE 42\n\ +/// 2 DATA\n\ +/// 3 DATE BEF 1 JAN 1900\n\ +/// 0 TRLR"; +/// +/// let mut ged = GedcomRecord::new(sample.chars()); +/// let data = ged.parse_record(); +/// let citation_data = data.individuals[0].source[0].data.as_ref().unwrap(); +/// +/// assert_eq!( +/// citation_data.date.as_ref().unwrap().value.as_ref().unwrap(), +/// "BEF 1 JAN 1900" +/// ); +/// ``` +#[derive(Clone, Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct SourceCitationData { + pub date: Option, + pub text: Option, +} + +impl SourceCitationData { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> SourceCitationData { + let mut data = SourceCitationData { + date: None, + text: None, + }; + data.parse(tokenizer, level); + data + } +} + +impl Parser for SourceCitationData { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip because this DATA tag should have now line value + tokenizer.next_token(); + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + + tokenizer.next_token(); + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "TEXT" => self.text = Some(TextFromSource::new(tokenizer, level + 1)), + _ => panic!( + "{} unhandled SourceCitationData tag: {}", + tokenizer.debug(), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unexpected SourceCitationData token: {:?}", + tokenizer.current_token + ), + } + } + } + } +} + +/// A verbatim copy of any description contained within the source. This indicates notes or text +/// that are actually contained in the source document, not the submitter's opinion about the +/// source. This should be, from the evidence point of view, "what the original record keeper said" +/// as opposed to the researcher's interpretation. The word TEXT, in this case, means from the text +/// which appeared in the source record including labels. +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 SOUR @SOURCE1@\n\ +/// 2 PAGE 42\n\ +/// 2 DATA\n\ +/// 3 DATE BEF 1 JAN 1900\n\ +/// 3 TEXT a sample text\n\ +/// 4 CONT Sample text continued here. The word TE\n\ +/// 4 CONC ST should not be broken!\n\ +/// 0 TRLR"; +/// +/// let mut ged = GedcomRecord::new(sample.chars()); +/// let data = ged.parse_record(); +/// let citation_data = data.individuals[0].source[0].data.as_ref().unwrap(); +/// +/// assert_eq!( +/// citation_data.text.as_ref().unwrap().value.as_ref().unwrap(), +/// "a sample text\nSample text continued here. The word TEST should not be broken!" +/// ); +/// ``` +#[derive(Clone, Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct TextFromSource { + pub value: Option, +} + +impl TextFromSource { + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> TextFromSource { + let mut text = TextFromSource { value: None }; + text.parse(tokenizer, level); + text + } +} + +impl Parser for TextFromSource { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + let mut value = String::new(); + value.push_str(&tokenizer.take_line_value()); + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + + tokenizer.next_token(); + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "CONC" => value.push_str(&tokenizer.take_line_value()), + "CONT" => { + value.push('\n'); + value.push_str(&tokenizer.take_line_value()); + } + _ => panic!( + "{} unhandled TextFromSource tag: {}", + tokenizer.debug(), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "Unexpected TextFromSource token: {:?}", + &tokenizer.current_token + ), + } + } + } + + if value != "" { + self.value = Some(value); + } + } +} + +/// The QUAY tag's value conveys the submitter's quantitative evaluation of the credibility of a +/// piece of information, based upon its supporting evidence. Some systems use this feature to rank +/// multiple conflicting opinions for display of most likely information first. It is not intended +/// to eliminate the receiver's need to evaluate the evidence for themselves. +/// +/// 0 = Unreliable evidence or estimated data +/// 1 = Questionable reliability of evidence (interviews, census, oral genealogies, or potential for bias for example, an autobiography) +/// 2 = Secondary evidence, data officially recorded sometime after event +/// 3 = Direct and primary evidence used, or by dominance of the evidence +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomRecord; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 SOUR @SOURCE1@\n\ +/// 2 PAGE 42\n\ +/// 2 QUAY 1 +/// 0 TRLR"; +/// +/// let mut ged = GedcomRecord::new(sample.chars()); +/// let data = ged.parse_record(); +/// let quay = data.individuals[0].source[0].certainty_assessment.as_ref().unwrap(); +/// +/// assert_eq!( +/// quay.get_int().unwrap(), +/// 1 +/// ); +/// ``` +#[derive(Clone, Debug)] +pub enum CertaintyAssessment { + Unreliable, + Questionable, + Secondary, + Direct, + None, +} + +impl CertaintyAssessment { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> CertaintyAssessment { + let mut quay = CertaintyAssessment::None; + quay.parse(tokenizer, level); + quay + } + + pub fn get_int(&self) -> Option { + match &self { + CertaintyAssessment::Unreliable => Some(0), + CertaintyAssessment::Questionable => Some(1), + CertaintyAssessment::Secondary => Some(2), + CertaintyAssessment::Direct => Some(3), + CertaintyAssessment::None => None, + } + } +} + +impl Parser for CertaintyAssessment { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + tokenizer.next_token(); + if let Token::LineValue(val) = &tokenizer.current_token { + *self = match val.as_str() { + "0" => CertaintyAssessment::Unreliable, + "1" => CertaintyAssessment::Questionable, + "2" => CertaintyAssessment::Secondary, + "3" => CertaintyAssessment::Direct, + _ => panic!( + "{} Unknown CertaintyAssessment value {} ({})", + tokenizer.debug(), + val, + level + ), + }; + } else { + panic!( + "Expected CertaintyAssessment LineValue, found {:?}", + tokenizer.current_token + ); + } + tokenizer.next_token(); + } +} diff --git a/src/types/translation.rs b/src/types/translation.rs index 15f1d87..c7b65ac 100644 --- a/src/types/translation.rs +++ b/src/types/translation.rs @@ -10,7 +10,7 @@ use crate::{ /// is found in NOTE and SNOTE payloads. Each NOTE-TRAN must have either a LANG substructure or a /// MIME substructure or both. If either is missing, it is assumed to have the same value as the /// superstructure. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#NOTE-TRAN -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Translation { pub value: Option, diff --git a/tests/header.rs b/tests/header.rs deleted file mode 100644 index 3664ac0..0000000 --- a/tests/header.rs +++ /dev/null @@ -1,287 +0,0 @@ -#[cfg(test)] -mod tests { - use gedcom::GedcomRecord; - - #[test] - fn parse_head_gedc() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 2 FORM LINEAGE-LINKED\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let head_gedc = data.header.unwrap().gedcom.unwrap(); - assert_eq!(head_gedc.version.unwrap(), "5.5"); - assert_eq!(head_gedc.form.unwrap(), "LINEAGE-LINKED"); - } - - #[test] - fn parse_head_sour() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 SOUR SOURCE_NAME\n\ - 2 VERS Version number of source-program\n\ - 2 NAME Name of source-program\n\ - 2 CORP Corporation name\n\ - 3 ADDR 2 Harrison Street\n\ - 4 CONT 7th Floor\n\ - 4 CONT Suite 175\n\ - 4 ADR1 2 Harrison Street\n\ - 4 ADR2 7th Floor\n\ - 4 ADR3 Suite 175\n\ - 4 CITY San Francisco\n\ - 4 STAE California\n\ - 4 POST 94105\n\ - 4 CTRY USA\n\ - 3 PHON Corporation phone number\n\ - 2 DATA Name of source data\n\ - 3 DATE 1 JAN 1998\n\ - 3 COPR Copyright of source data\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let sour = data.header.unwrap().source.unwrap(); - assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); - - let vers = sour.version.unwrap(); - assert_eq!(vers, "Version number of source-program"); - - let name = sour.name.unwrap(); - assert_eq!(name, "Name of source-program"); - - let corp = sour.corporation.unwrap(); - assert_eq!(corp.value.unwrap(), "Corporation name"); - - let corp_addr = corp.address.unwrap(); - assert_eq!( - corp_addr.value.unwrap(), - "2 Harrison Street\n7th Floor\nSuite 175" - ); - assert_eq!(corp_addr.adr1.unwrap(), "2 Harrison Street"); - assert_eq!(corp_addr.adr2.unwrap(), "7th Floor"); - assert_eq!(corp_addr.adr3.unwrap(), "Suite 175"); - assert_eq!(corp_addr.city.unwrap(), "San Francisco"); - assert_eq!(corp_addr.state.unwrap(), "California"); - assert_eq!(corp_addr.post.unwrap(), "94105"); - assert_eq!(corp_addr.country.unwrap(), "USA"); - - let corp_phon = corp.phone.unwrap(); - assert_eq!(corp_phon, "Corporation phone number"); - - let sour_data = sour.data.unwrap(); - assert_eq!(sour_data.value.unwrap(), "Name of source data"); - assert_eq!(sour_data.date.unwrap().value.unwrap(), "1 JAN 1998"); - assert_eq!( - sour_data.copyright.unwrap().value.unwrap(), - "Copyright of source data" - ); - } - - #[test] - fn parse_head_dest() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 DEST Destination of transmission\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - assert_eq!( - data.header.unwrap().destination.unwrap(), - "Destination of transmission" - ); - } - - #[test] - fn parse_head_date() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 DATE 1 JAN 1998\n\ - 2 TIME 13:57:24.80\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_date = data.header.unwrap().date.unwrap(); - assert_eq!(h_date.value.unwrap(), "1 JAN 1998"); - assert_eq!(h_date.time.unwrap(), "13:57:24.80"); - } - - #[test] - fn parse_head_subm() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 SUBM @SUBMITTER@\n\ - 1 SUBN @SUBMISSION@\n\ - 1 FILE ALLGED.GED\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_subm = data.header.unwrap().submitter_tag.unwrap(); - assert_eq!(h_subm.as_str(), "@SUBMITTER@"); - } - - #[test] - fn parse_head_subn() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 SUBM @SUBMITTER@\n\ - 1 SUBN @SUBMISSION@\n\ - 1 FILE ALLGED.GED\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_subn = data.header.unwrap().submission_tag.unwrap(); - assert_eq!(h_subn.as_str(), "@SUBMISSION@"); - } - - #[test] - fn parse_head_file() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 SUBM @SUBMITTER@\n\ - 1 SUBN @SUBMISSION@\n\ - 1 FILE ALLGED.GED\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_file = data.header.unwrap().filename.unwrap(); - assert_eq!(h_file.as_str(), "ALLGED.GED"); - } - - #[test] - fn parse_head_copr() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 COPR (C) 1997-2000 by H. Eichmann.\n\ - 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_copr = data.header.unwrap().copyright.unwrap(); - assert_eq!(h_copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); - assert_eq!( - h_copr.continued.unwrap(), - "You can use and distribute this file freely as long as you do not charge for it." - ); - } - - #[test] - fn parse_head_char() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 CHAR ASCII\n\ - 2 VERS Version number of ASCII (whatever it means)\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_char = data.header.unwrap().encoding.unwrap(); - assert_eq!(h_char.value.unwrap(), "ASCII"); - assert_eq!( - h_char.version.unwrap(), - "Version number of ASCII (whatever it means)" - ); - } - - #[test] - fn parse_head_lang() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 LANG language - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_lang = data.header.unwrap().language.unwrap(); - assert_eq!(h_lang.as_str(), "language"); - } - - #[test] - fn parse_head_plac() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 PLAC\n\ - 2 FORM City, County, State, Country\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_plac = data.header.unwrap().place.unwrap(); - assert_eq!(h_plac.form[0], "City"); - assert_eq!(h_plac.form[1], "County"); - assert_eq!(h_plac.form[2], "State"); - assert_eq!(h_plac.form[3], "Country"); - } - - #[test] - fn parse_head_note() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 1 NOTE A general note about this file:\n\ - 2 CONT It demonstrates most of the data which can be submitted using GEDCOM5.5. It shows the relatives of PERSON1:\n\ - 2 CONT His 2 wifes (PERSON2, PERSON8), his parents (father: PERSON5, mother not given),\n\ - 2 CONT adoptive parents (mother: PERSON6, father not given) and his 3 children (PERSON3, PERSON4 and PERSON7).\n\ - 2 CONT In PERSON1, FAMILY1, SUBMITTER, SUBMISSION and SOURCE1 as many datafields as possible are used.\n\ - 2 CONT All other individuals/families contain no data. Note, that many data tags can appear more than once\n\ - 2 CONT (in this transmission this is demonstrated with tags: NAME, OCCU, PLACE and NOTE. Seek the word 'another'.\n\ - 2 CONT The data transmitted here do not make sence. Just the HEAD.DATE tag contains the date of the creation\n\ - 2 CONT of this file and will change in future Versions!\n\ - 2 CONT This file is created by H. Eichmann: h.eichmann@@gmx.de. Feel free to copy and use it for any\n\ - 2 CONT non-commercial purpose. For the creation the GEDCOM standard Release 5.5 (2 JAN 1996) has been used.\n\ - 2 CONT Copyright: gedcom@@gedcom.org\n\ - 2 CONT Download it (the GEDCOM 5.5 specs) from: ftp.gedcom.com/pub/genealogy/gedcom.\n\ - 2 CONT Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit).\n\ - 2 CONT This @@ (commercial at) character may only appear ONCE!\n\ - 2 CONT Note continued here. The word TE\n\ - 2 CONC ST should not be broken!\n\ - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - - let h_note = data.header.unwrap().note.unwrap(); - assert_eq!(h_note.value.unwrap().chars().count(), 1440); - } -} diff --git a/tests/multimedia.rs b/tests/multimedia.rs deleted file mode 100644 index abe9eea..0000000 --- a/tests/multimedia.rs +++ /dev/null @@ -1,117 +0,0 @@ -#[cfg(test)] -mod tests { - use gedcom::GedcomRecord; - - #[test] - fn parses_basic_multimedia_record() { - let sample = "\ - 0 HEAD\n\ - 1 CHAR UTF-8\n\ - 1 SOUR Ancestry.com Family Trees\n\ - 2 VERS (2010.3)\n\ - 2 NAME Ancestry.com Family Trees\n\ - 2 CORP Ancestry.com\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 2 FORM LINEAGE-LINKED\n\ - 0 OBJE\n\ - 1 FILE http://trees.ancestry.com/rd?f=image&guid=Xxxxxxxx-Xxxx-Xxxx-Xxxx-Xxxxxxxxxxxx&tid=Xxxxxxxx&pid=1\n\ - 1 FORM jpg\n\ - 1 TITL In Prague\n\ - 0 TRLR"; - - let mut record = GedcomRecord::new(sample.chars()); - let data = record.parse_record(); - assert_eq!(data.multimedia.len(), 1); - - let obje = &data.multimedia[0]; - assert_eq!(obje.title.as_ref().unwrap(), "In Prague"); - - let form = obje.form.as_ref().unwrap(); - assert_eq!(form.value.as_ref().unwrap(), "jpg"); - - let file = obje.file.as_ref().unwrap(); - assert_eq!( - file.value.as_ref().unwrap(), - "http://trees.ancestry.com/rd?f=image&guid=Xxxxxxxx-Xxxx-Xxxx-Xxxx-Xxxxxxxxxxxx&tid=Xxxxxxxx&pid=1" - ); - } - - #[test] - fn parses_spec_structure() { - let sample = "\ - 0 HEAD\n\ - 1 GEDC\n\ - 2 VERS 5.5\n\ - 2 FORM LINEAGE-LINKED\n\ - 0 @MEDIA1@ OBJE\n\ - 1 FILE /home/user/media/file_name.bmp\n\ - 2 FORM bmp\n\ - 3 TYPE photo - 2 TITL A Bitmap\n\ - 1 REFN 000\n\ - 2 TYPE User Reference Type\n\ - 1 RIN Automated Id\n\ - 1 NOTE A note\n\ - 2 CONT Note continued here. The word TE\n\ - 2 CONC ST should not be broken!\n\ - 1 SOUR @SOUR1@\n\ - 2 PAGE 42 - 2 _CUSTOM Custom data\n\ - 1 CHAN - 2 DATE 1 APR 1998 - 3 TIME 12:34:56.789 - 2 NOTE A note - 3 CONT Note continued here. The word TE - 3 CONC ST should not be broken! - 0 TRLR"; - - let mut parser = GedcomRecord::new(sample.chars()); - let data = parser.parse_record(); - assert_eq!(data.multimedia.len(), 1); - - let obje = &data.multimedia[0]; - assert_eq!(obje.xref.as_ref().unwrap(), "@MEDIA1@"); - - let file = obje.file.as_ref().unwrap(); - assert_eq!( - file.value.as_ref().unwrap(), - "/home/user/media/file_name.bmp" - ); - - assert_eq!(file.title.as_ref().unwrap(), "A Bitmap"); - - let form = file.form.as_ref().unwrap(); - assert_eq!(form.value.as_ref().unwrap(), "bmp"); - assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); - - let user_ref = obje.user_reference_number.as_ref().unwrap(); - assert_eq!(user_ref.value.as_ref().unwrap(), "000"); - assert_eq!( - user_ref.user_reference_type.as_ref().unwrap(), - "User Reference Type" - ); - - assert_eq!(obje.automated_record_id.as_ref().unwrap(), "Automated Id"); - - let note = obje.note_structure.as_ref().unwrap(); - assert_eq!( - note.value.as_ref().unwrap(), - "A note\nNote continued here. The word TEST should not be broken!" - ); - - let sour = obje.source_citation.as_ref().unwrap(); - assert_eq!(sour.xref, "@SOUR1@"); - assert_eq!(sour.page.as_ref().unwrap(), "42"); - assert_eq!(sour.custom_data.len(), 1); - assert_eq!(sour.custom_data[0].value, "Custom data"); - - let chan = obje.change_date.as_ref().unwrap(); - let date = chan.date.as_ref().unwrap(); - assert_eq!(date.value.as_ref().unwrap(), "1 APR 1998"); - assert_eq!(date.time.as_ref().unwrap(), "12:34:56.789"); - - let chan_note = chan.note.as_ref().unwrap(); - assert_eq!(chan_note.value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); - } -} From 3a03f8b4c90af319ca947c4d6cf10ad5e2ffda77 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 26 Nov 2022 14:55:32 -0600 Subject: [PATCH 34/55] Refactor of several structs and tests --- README.md | 22 +++-- readme.md | 74 ---------------- src/bin.rs | 8 +- src/lib.rs | 103 +++++++++++++++++----- src/parser.rs | 7 -- src/types/address.rs | 2 +- src/types/copyright.rs | 27 +++++- src/types/corporation.rs | 2 +- src/types/date.rs | 37 +++++++- src/types/event.rs | 2 +- src/types/family.rs | 8 +- src/types/header.rs | 88 ++++++++----------- src/types/individual.rs | 15 +++- src/types/multimedia.rs | 181 +++++++++++++++++++-------------------- src/types/note.rs | 8 +- src/types/repository.rs | 2 +- src/types/source.rs | 32 ++++--- src/types/submitter.rs | 2 +- src/types/translation.rs | 2 +- tests/json_feature.rs | 2 +- tests/lib.rs | 10 +-- 21 files changed, 327 insertions(+), 307 deletions(-) delete mode 100644 readme.md delete mode 100644 src/parser.rs diff --git a/README.md b/README.md index c82a41d..406a871 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # rust-gedcom - - - - - - + + + + + + > A gedcom parser written in rust 🦀 @@ -13,11 +13,9 @@ GEDCOM is a file format for sharing genealogical information like family trees. -I wanted experience playing with parsers and representing tree structures in Rust, and noticed a parser for Rust did not exist. And thus, this project was born! A fun experiment to practice my Rust abilities. +`rust-gedcom` hopes to be ~~fully~~ mostly compliant with the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). -It hopes to be ~~fully~~ mostly compliant with the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). - -I have found this [5.5.2 specification](https://jfcardinal.github.io/GEDCOM-5.5.2/gedcom-5.5.2.html) useful in its assessment of which tags are worth supporting or not. +Later specifications, such as [5.5.2](https://jfcardinal.github.io/GEDCOM-5.5.2/gedcom-5.5.2.html) and [7.0.11](https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#purpose-and-content-of-the-familysearch-gedcom-specification) are useful in assessing which tags are worth supporting or not. ## Usage @@ -54,7 +52,7 @@ gedcom = { version = "", features = ["json"] } ## 🚧 Progress 🚧 -There are still parts of the specification not yet implemented and the project is subject to change. The way I have been developing is to take a gedcom file, attempt to parse it and act on whatever errors or omissions occur. In it's current state, it is capable of parsing the [sample.ged](tests/fixtures/sample.ged) in its entirety. +There are still parts of the specification not yet implemented, and the project is subject to change. The way development has been happening is by taking a GEDCOM file, attempting to parse it and acting on whatever errors or omissions occur. In its current state, it is capable of parsing the [sample.ged](tests/fixtures/sample.ged) in its entirety. Here are some notes about parsed data & tags. Page references are to the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). @@ -66,4 +64,4 @@ Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources ( ## License -© 2021, [Robert Pirtle](https://robert.pirtle.xyz/). licensed under [MIT](license.md). +Licensed under [MIT](license.md). diff --git a/readme.md b/readme.md deleted file mode 100644 index da06c1e..0000000 --- a/readme.md +++ /dev/null @@ -1,74 +0,0 @@ -# rust-gedcom - - - - - - - - -> a gedcom parser written in rust 🦀 - -## About this project - -GEDCOM is a file format for sharing genealogical information like family trees, and it's widely used in many genealogy programs. - -I wanted experience playing with parsers and representing tree structures in Rust, and noticed a parser for Rust did not exist. And thus, this project was born! A fun experiment to practice my Rust abilities. - -It hopes to be ~~fully~~ mostly compliant with the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). - -I have found this [5.5.2 specification](https://jfcardinal.github.io/GEDCOM-5.5.2/gedcom-5.5.2.html) useful in its assessment of which tags are worth supporting or not. - -## Usage - -This crate comes in two parts. The first is a binary called `parse_gedcom`, mostly used for my testing & development. It prints the `GedcomData` object and some stats about the gedcom file passed into it: -```bash -parse_gedcom ./tests/fixtures/sample.ged - -# outputs tree data here w/ stats -# ---------------------- -# | Gedcom Data Stats: | -# ---------------------- -# submitters: 1 -# individuals: 3 -# families: 2 -# repositories: 1 -# sources: 1 -# multimedia: 0 -# ---------------------- -``` - -The second is a library containing the parser. - -## JSON Serializing/Deserializing with `serde` -This crate has an optional feature called `json` that implements `Serialize` & `Deserialize` for the gedcom data structure. This allows you to easily integrate with the web. - -For more info about serde, [check them out](https://serde.rs/)! - -The feature is not enabled by default. There are zero dependencies if just using the gedcom parsing functionality. - -Use the json feature with any version >=0.2.1 by adding the following to your Cargo.toml: -```toml -gedcom = { version = "", features = ["json"] } -``` - -## 🚧 Progress 🚧 - -There are still parts of the specification not yet implemented and the project is subject to change. The way I have been developing is to take a gedcom file, attempt to parse it and act on whatever errors or omissions occur. In it's current state, it is capable of parsing the [sample.ged](tests/fixtures/sample.ged) in its entirety. - -Here are some notes about parsed data & tags. Page references are to the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). - -### Top-level tags - -* `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. -* `NOTE_RECORD` - p.27 - Notes (`NOTE`) are also unhandled. (except in header) - -Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. - -## Notes to self - -* Consider creating some Traits to handle change dates, notes, source citations, and other recurring fields. - -## License - -© 2021, [Robert Pirtle](https://robert.pirtle.xyz/). licensed under [MIT](license.md). diff --git a/src/bin.rs b/src/bin.rs index 737deaf..c7d8a75 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,10 +1,10 @@ -// use ged::{GedcomRecord, GedcomData}; +// use ged::{GedcomDocument, GedcomData}; use std::env; use std::fs; use std::path::PathBuf; -use gedcom::{GedcomData, GedcomRecord}; +use gedcom::{GedcomData, GedcomDocument}; fn main() { let args: Vec = env::args().collect(); @@ -23,8 +23,8 @@ fn main() { let data: GedcomData; if let Ok(contents) = read_relative(filename) { - let mut parser = GedcomRecord::new(contents.chars()); - data = parser.parse_record(); + let mut doc = GedcomDocument::new(contents.chars()); + data = doc.parse_document(); println!("Parsing complete!"); // println!("\n\n{:#?}", data); diff --git a/src/lib.rs b/src/lib.rs index f386e3c..bdb778e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,11 +2,11 @@ ```rust // the parser takes the gedcom file contents as a chars iterator -use gedcom::GedcomRecord; +use gedcom::GedcomDocument; let gedcom_source = std::fs::read_to_string("./tests/fixtures/sample.ged").unwrap(); -let mut record = GedcomRecord::new(gedcom_source.chars()); -let gedcom_data = record.parse_record(); +let mut doc = GedcomDocument::new(gedcom_source.chars()); +let gedcom_data = doc.parse_document(); // output some stats on the gedcom contents gedcom_data.stats(); @@ -18,6 +18,11 @@ This crate contains an optional `"json"` feature that implements serialization & #![deny(clippy::pedantic)] #![warn(missing_docs)] +use std::str::Chars; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + #[macro_use] mod util; @@ -29,37 +34,96 @@ use types::{ UserDefinedData, Family, Header, Individual, MultimediaRecord, Repository, Source, Submitter, }; -mod parser; -pub use parser::Parser; -use std::str::Chars; - -/// The Gedcom parser that converts the token list into a data structure -pub struct GedcomRecord<'a> { +/// The GedcomDocument can convert the token list into a data structure. The order of the Dataset +/// should be as follows: the HEAD must come first and TRLR must be last, with any RECORDs in +/// between. +/// +/// # A Minimal Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD +/// 1 GEDC +/// 2 VERS 5.5 +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let head = data.header.unwrap(); +/// let gedc = head.gedcom.unwrap(); +/// assert_eq!(gedc.version.unwrap(), "5.5"); +/// ``` +pub struct GedcomDocument<'a> { tokenizer: Tokenizer<'a>, } -impl<'a> GedcomRecord<'a> { +impl<'a> GedcomDocument<'a> { /// Creates a parser state machine for parsing a gedcom file as a chars iterator #[must_use] - pub fn new(chars: Chars<'a>) -> GedcomRecord { + pub fn new(chars: Chars<'a>) -> GedcomDocument { let mut tokenizer = Tokenizer::new(chars); tokenizer.next_token(); - GedcomRecord { tokenizer } + GedcomDocument { tokenizer } } /// Does the actual parsing of the record. - pub fn parse_record(&mut self) -> GedcomData { + pub fn parse_document(&mut self) -> GedcomData { GedcomData::new(&mut self.tokenizer, 0) } } -#[cfg(feature = "json")] -use serde::{Deserialize, Serialize}; +/// The Parser trait converts a subset of a token list into a type's data structure. +pub trait Parser { + /// parse does the actual parsing of a subset of a token list + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8); +} +/// GedcomData is the data structure representing all the data within a gedcom file +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD +/// 1 GEDC +/// 2 VERS 5.5 +/// 0 @SUBMITTER@ SUBM +/// 0 @PERSON1@ INDI +/// 0 @FAMILY1@ FAM +/// 0 @R1@ REPO +/// 0 @SOURCE1@ SOUR +/// 0 @MEDIA1@ OBJE\n\ +/// 0 _MYOWNTAG This is a non-standard tag. Not recommended but allowed +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// assert_eq!(data.submitters.len(), 1); +/// assert_eq!(data.submitters[0].xref.as_ref().unwrap(), "@SUBMITTER@"); +/// +/// assert_eq!(data.individuals.len(), 1); +/// assert_eq!(data.individuals[0].xref.as_ref().unwrap(), "@PERSON1@"); +/// +/// assert_eq!(data.families.len(), 1); +/// assert_eq!(data.families[0].xref.as_ref().unwrap(), "@FAMILY1@"); +/// +/// assert_eq!(data.repositories.len(), 1); +/// assert_eq!(data.repositories[0].xref.as_ref().unwrap(), "@R1@"); +/// +/// assert_eq!(data.sources.len(), 1); +/// assert_eq!(data.sources[0].xref.as_ref().unwrap(), "@SOURCE1@"); +/// +/// assert_eq!(data.custom_data.len(), 1); +/// assert_eq!(data.custom_data[0].tag, "_MYOWNTAG"); +/// assert_eq!(data.custom_data[0].value, "This is a non-standard tag. Not recommended but allowed"); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// The data structure representing all the data within a gedcom file pub struct GedcomData { /// Header containing file metadata pub header: Option
, @@ -146,7 +210,6 @@ impl Parser for GedcomData { /// Does the actual parsing of the record. fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { loop { - // TODO is this necessary? let current_level = match tokenizer.current_token { Token::Level(n) => n, _ => panic!( @@ -203,7 +266,7 @@ impl Parser for GedcomData { #[must_use] /// Helper function for converting GEDCOM file content stream to parsed data. -pub fn parse(content: std::str::Chars) -> GedcomData { - let mut p = GedcomRecord::new(content); - p.parse_record() +pub fn parse_ged(content: std::str::Chars) -> GedcomData { + let mut p = GedcomDocument::new(content); + p.parse_document() } diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index ad73e84..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,7 +0,0 @@ -use crate::tokenizer::Tokenizer; - -/// Parse converts a subset of a token list into a type's data structure. -pub trait Parser { - /// parse does the actual parsing of a subset of a token list - fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8); -} diff --git a/src/types/address.rs b/src/types/address.rs index 11f37a4..23e0e58 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use std::fmt; use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, }; diff --git a/src/types/copyright.rs b/src/types/copyright.rs index ebc87a3..5f6b129 100644 --- a/src/types/copyright.rs +++ b/src/types/copyright.rs @@ -2,12 +2,37 @@ use serde::{Deserialize, Serialize}; use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, }; /// A copyright statement, as appropriate for the copyright laws applicable to this data. /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#COPR +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomDocument; +/// +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 COPR (C) 1997-2000 by H. Eichmann.\n\ +/// 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// let header = data.header.unwrap(); +/// let copr = header.copyright.unwrap(); +/// +/// assert_eq!(copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); +/// assert_eq!( +/// copr.continued.unwrap(), +/// "You can use and distribute this file freely as long as you do not charge for it." +/// ); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Copyright { diff --git a/src/types/corporation.rs b/src/types/corporation.rs index 453b161..28d67eb 100644 --- a/src/types/corporation.rs +++ b/src/types/corporation.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::Address, }; diff --git a/src/types/date.rs b/src/types/date.rs index 209cf15..ef2c0e5 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -1,12 +1,12 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, + types::Note, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use super::Note; /// TODO Date should encompasses a number of date formats, e.g. approximated, period, phrase and range. #[derive(Clone, Debug, Default)] @@ -65,6 +65,39 @@ impl Parser for Date { /// ChangeDate is intended to only record the last change to a record. Some systems may want to /// manage the change process with more detail, but it is sufficient for GEDCOM purposes to /// indicate the last time that a record was modified. +/// +/// # Example +/// ``` +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @MEDIA1@ OBJE\n\ +/// 1 FILE /home/user/media/file_name.bmp\n\ +/// 1 CHAN +/// 2 DATE 1 APR 1998 +/// 3 TIME 12:34:56.789 +/// 2 NOTE A note +/// 3 CONT Note continued here. The word TE +/// 3 CONC ST should not be broken! +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// assert_eq!(data.multimedia.len(), 1); +/// +/// let obje = &data.multimedia[0]; +/// +/// let chan = obje.change_date.as_ref().unwrap(); +/// let date = chan.date.as_ref().unwrap(); +/// assert_eq!(date.value.as_ref().unwrap(), "1 APR 1998"); +/// assert_eq!(date.time.as_ref().unwrap(), "12:34:56.789"); +/// +/// let chan_note = chan.note.as_ref().unwrap(); +/// assert_eq!(chan_note.value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); +/// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct ChangeDate { diff --git a/src/types/event.rs b/src/types/event.rs index 91b69e5..1338c92 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::{ChildToFamilyLink, Note, SourceCitation}, }; diff --git a/src/types/family.rs b/src/types/family.rs index ee5611f..2e48bd8 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::{event::HasEvents, Event, Note}, }; @@ -102,7 +102,7 @@ impl HasEvents for Family { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -116,8 +116,8 @@ impl HasEvents for Family { /// 2 FAMC @PARENTS@ /// 0 TRLR"; /// -/// let mut ged = GedcomRecord::new(sample.chars()); -/// let data = ged.parse_record(); +/// let mut ged = GedcomDocument::new(sample.chars()); +/// let data = ged.parse_document(); /// /// assert_eq!(data.individuals[0].events[0].child_to_family_link.as_ref().unwrap().xref.as_ref().unwrap(), "@PARENTS@"); /// diff --git a/src/types/header.rs b/src/types/header.rs index 764171d..d6f8d61 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::{Copyright, Corporation, Date, Note}, }; @@ -13,59 +13,43 @@ use super::UserDefinedData; /// /// # Example /// -/// ``` -/// use gedcom::GedcomRecord; +/// ```rust +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ /// 2 VERS 5.5\n\ /// 1 DEST Destination of transmission\n\ -/// 1 DATE 1 JAN 1998\n\ -/// 2 TIME 13:57:24.80\n\ /// 1 SUBM @SUBMITTER@\n\ /// 1 SUBN @SUBMISSION@\n\ /// 1 FILE ALLGED.GED\n\ -/// 1 COPR (C) 1997-2000 by H. Eichmann.\n\ -/// 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ -/// 1 LANG language +/// 1 LANG language\n\ /// 0 TRLR"; /// -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); -/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// let header = data.header.unwrap(); -/// assert_eq!(header.gedcom.unwrap().version.unwrap(), "5.5"); /// -/// assert_eq!( -/// header.destination.unwrap(), -/// "Destination of transmission" -/// ); +/// let dest = header.destination.unwrap(); +/// assert_eq!(dest, "Destination of transmission"); /// -/// let date = header.date.unwrap(); -/// assert_eq!(date.value.unwrap(), "1 JAN 1998"); -/// assert_eq!(date.time.unwrap(), "13:57:24.80"); +/// let subn = header.submitter_tag.unwrap(); +/// assert_eq!(subn, "@SUBMITTER@"); /// /// let subm = header.submission_tag.unwrap(); /// assert_eq!(subm, "@SUBMISSION@"); /// -/// let file = header.filename.unwrap(); -/// assert_eq!(file, "ALLGED.GED"); -/// -/// let copr = header.copyright.unwrap(); -/// assert_eq!(copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); -/// assert_eq!( -/// copr.continued.unwrap(), -/// "You can use and distribute this file freely as long as you do not charge for it." -/// ); -/// /// let lang = header.language.unwrap(); /// assert_eq!(lang.as_str(), "language"); +/// +/// let file = header.filename.unwrap(); +/// assert_eq!(file, "ALLGED.GED"); /// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Header { /// tag: GEDC - pub gedcom: Option, + pub gedcom: Option, /// tag: CHAR pub encoding: Option, /// tag: SOUR @@ -121,7 +105,7 @@ impl Parser for Header { while tokenizer.current_token != Token::Level(level) { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "GEDC" => self.gedcom = Some(GedcomDoc::new(tokenizer, 1)), + "GEDC" => self.gedcom = Some(GedcomMeta::new(tokenizer, 1)), "SOUR" => self.source = Some(HeadSour::new(tokenizer, 1)), "DEST" => self.destination = Some(tokenizer.take_line_value()), "DATE" => self.date = Some(Date::new(tokenizer, 1)), @@ -146,14 +130,14 @@ impl Parser for Header { } } -/// GedcomDoc (tag: GEDC) is a container for information about the entire document. It is +/// GedcomMeta (tag: GEDC) is a container for information about the entire document. It is /// recommended that applications write GEDC with its required subrecord VERS as the first /// substructure of a HEAD. See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#GEDC /// /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -161,8 +145,8 @@ impl Parser for Header { /// 2 FORM LINEAGE-LINKED\n\ /// 0 TRLR"; /// -/// let mut ged = GedcomRecord::new(sample.chars()); -/// let data = ged.parse_record(); +/// let mut ged = GedcomDocument::new(sample.chars()); +/// let data = ged.parse_document(); /// /// let head_gedc = data.header.unwrap().gedcom.unwrap(); /// assert_eq!(head_gedc.version.unwrap(), "5.5"); @@ -170,23 +154,23 @@ impl Parser for Header { /// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct GedcomDoc { +pub struct GedcomMeta { /// tag: VERS pub version: Option, /// tag: FORM; see Gedcom 5.5.1 specification, p. 50 pub form: Option, } -impl GedcomDoc { +impl GedcomMeta { #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomDoc { - let mut gedc = GedcomDoc::default(); + pub fn new(tokenizer: &mut Tokenizer, level: u8) -> GedcomMeta { + let mut gedc = GedcomMeta::default(); gedc.parse(tokenizer, level); gedc } } -impl Parser for GedcomDoc { +impl Parser for GedcomMeta { /// parse handles parsing GEDC tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip GEDC tag @@ -231,7 +215,7 @@ impl Parser for GedcomDoc { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -240,8 +224,8 @@ impl Parser for GedcomDoc { /// 2 VERS Version number of ASCII (whatever it means)\n\ /// 0 TRLR"; -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// let h_char = data.header.unwrap().encoding.unwrap(); /// assert_eq!(h_char.value.unwrap(), "ASCII"); @@ -302,7 +286,7 @@ impl Parser for Encoding { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -312,8 +296,8 @@ impl Parser for Encoding { /// 2 NAME Name of source-program\n\ /// 0 TRLR"; /// -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// /// let sour = data.header.unwrap().source.unwrap(); /// assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); @@ -380,7 +364,7 @@ impl Parser for HeadSour { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -391,8 +375,8 @@ impl Parser for HeadSour { /// 3 COPR Copyright of source data\n\ /// 0 TRLR"; /// -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// /// let sour = data.header.unwrap().source.unwrap(); /// assert_eq!(sour.value.unwrap(), "SOURCE_NAME"); @@ -461,7 +445,7 @@ impl Parser for HeadSourData { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -470,8 +454,8 @@ impl Parser for HeadSourData { /// 2 FORM City, County, State, Country\n\ /// 0 TRLR"; /// -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// /// let h_plac = data.header.unwrap().place.unwrap(); /// assert_eq!(h_plac.form[0], "City"); diff --git a/src/types/individual.rs b/src/types/individual.rs index 279334d..1d8dc06 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,14 +1,12 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, Event, MultimediaRecord, Note, SourceCitation, UserDefinedData}, + types::{event::HasEvents, Event, MultimediaRecord, Note, SourceCitation, UserDefinedData, Xref}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -type Xref = String; - /// A Person within the family tree #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -141,6 +139,15 @@ impl Gender { gender.parse(tokenizer, level); gender } + + pub fn get_gender(&self) -> &str { + match &self { + Gender::Male => "M", + Gender::Female => "F", + Gender::Nonbinary => "N", + Gender::Unknown => "U", + } + } } impl Parser for Gender { diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index 255f091..fbb93f6 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::{Note, SourceCitation, Xref}, }; @@ -9,7 +9,7 @@ use super::ChangeDate; #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// The multimedia record refers to 1 or more external digital files, and may provide some +/// MultimediaRecord refers to 1 or more external digital files, and may provide some /// additional information about the files and the media they encode. /// /// The file reference can occur more than once to group multiple files together. Grouped files @@ -22,7 +22,7 @@ use super::ChangeDate; /// /// # Example /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -30,73 +30,24 @@ use super::ChangeDate; /// 2 FORM LINEAGE-LINKED\n\ /// 0 @MEDIA1@ OBJE\n\ /// 1 FILE /home/user/media/file_name.bmp\n\ -/// 2 FORM bmp\n\ -/// 3 TYPE photo -/// 2 TITL A Bitmap\n\ -/// 1 REFN 000\n\ -/// 2 TYPE User Reference Type\n\ +/// 1 TITL A Title\n\ /// 1 RIN Automated Id\n\ -/// 1 NOTE A note\n\ -/// 2 CONT Note continued here. The word TE\n\ -/// 2 CONC ST should not be broken!\n\ -/// 1 SOUR @SOUR1@\n\ -/// 2 PAGE 42 -/// 2 _CUSTOM Custom data\n\ -/// 1 CHAN -/// 2 DATE 1 APR 1998 -/// 3 TIME 12:34:56.789 -/// 2 NOTE A note -/// 3 CONT Note continued here. The word TE -/// 3 CONC ST should not be broken! /// 0 TRLR"; /// -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); -/// assert_eq!(data.multimedia.len(), 1); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// +/// assert_eq!(data.multimedia.len(), 1); /// let obje = &data.multimedia[0]; -/// assert_eq!(obje.xref.as_ref().unwrap(), "@MEDIA1@"); /// -/// let file = obje.file.as_ref().unwrap(); -/// assert_eq!( -/// file.value.as_ref().unwrap(), -/// "/home/user/media/file_name.bmp" -/// ); +/// let xref = obje.xref.as_ref().unwrap(); +/// assert_eq!(xref, "@MEDIA1@"); /// -/// assert_eq!(file.title.as_ref().unwrap(), "A Bitmap"); -/// -/// let form = file.form.as_ref().unwrap(); -/// assert_eq!(form.value.as_ref().unwrap(), "bmp"); -/// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); -/// -/// let user_ref = obje.user_reference_number.as_ref().unwrap(); -/// assert_eq!(user_ref.value.as_ref().unwrap(), "000"); -/// assert_eq!( -/// user_ref.user_reference_type.as_ref().unwrap(), -/// "User Reference Type" -/// ); +/// let titl = obje.title.as_ref().unwrap(); +/// assert_eq!(titl, "A Title"); /// -/// assert_eq!(obje.automated_record_id.as_ref().unwrap(), "Automated Id"); -/// -/// let note = obje.note_structure.as_ref().unwrap(); -/// assert_eq!( -/// note.value.as_ref().unwrap(), -/// "A note\nNote continued here. The word TEST should not be broken!" -/// ); -/// -/// let sour = obje.source_citation.as_ref().unwrap(); -/// assert_eq!(sour.xref, "@SOUR1@"); -/// assert_eq!(sour.page.as_ref().unwrap(), "42"); -/// assert_eq!(sour.custom_data.len(), 1); -/// assert_eq!(sour.custom_data[0].value, "Custom data"); -/// -/// let chan = obje.change_date.as_ref().unwrap(); -/// let date = chan.date.as_ref().unwrap(); -/// assert_eq!(date.value.as_ref().unwrap(), "1 APR 1998"); -/// assert_eq!(date.time.as_ref().unwrap(), "12:34:56.789"); -/// -/// let chan_note = chan.note.as_ref().unwrap(); -/// assert_eq!(chan_note.value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); +/// let rin = obje.automated_record_id.as_ref().unwrap(); +/// assert_eq!(rin, "Automated Id"); /// ``` pub struct MultimediaRecord { /// Optional reference to link to this submitter @@ -172,7 +123,7 @@ impl Parser for MultimediaRecord { /// /// # Example /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 CHAR UTF-8\n\ @@ -189,8 +140,8 @@ impl Parser for MultimediaRecord { /// 1 TITL In Prague\n\ /// 0 TRLR"; /// -/// let mut record = GedcomRecord::new(sample.chars()); -/// let data = record.parse_record(); +/// let mut record = GedcomDocument::new(sample.chars()); +/// let data = record.parse_document(); /// assert_eq!(data.multimedia.len(), 1); /// /// let obje = &data.multimedia[0]; @@ -257,34 +208,30 @@ impl Parser for MultimediaLink { } } -/// A complete local or remote file reference to the auxiliary data to be linked to the GEDCOM -/// context. Remote reference would include a network address where the multimedia data may -/// be obtained. +/// MultimediaFileRefn is a complete local or remote file reference to the auxiliary data to be +/// linked to the GEDCOM context. Remote reference would include a network address where the +/// multimedia data may be obtained. +/// /// # Example -/// ``` -/// use gedcom::GedcomRecord; +/// +/// ```rust +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ /// 2 VERS 5.5\n\ -/// 2 FORM LINEAGE-LINKED\n\ /// 0 @MEDIA1@ OBJE\n\ /// 1 FILE /home/user/media/file_name.bmp\n\ /// 2 FORM bmp\n\ /// 3 TYPE photo /// 2 TITL A Bitmap\n\ -/// 1 REFN 000\n\ -/// 2 TYPE User Reference Type\n\ /// 0 TRLR"; /// -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// assert_eq!(data.multimedia.len(), 1); /// -/// let obje = &data.multimedia[0]; -/// assert_eq!(obje.xref.as_ref().unwrap(), "@MEDIA1@"); -/// -/// let file = obje.file.as_ref().unwrap(); +/// let file = data.multimedia[0].file.as_ref().unwrap(); /// assert_eq!( /// file.value.as_ref().unwrap(), /// "/home/user/media/file_name.bmp" @@ -295,13 +242,6 @@ impl Parser for MultimediaLink { /// let form = file.form.as_ref().unwrap(); /// assert_eq!(form.value.as_ref().unwrap(), "bmp"); /// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); -/// -/// let user_ref = obje.user_reference_number.as_ref().unwrap(); -/// assert_eq!(user_ref.value.as_ref().unwrap(), "000"); -/// assert_eq!( -/// user_ref.user_reference_type.as_ref().unwrap(), -/// "User Reference Type" -/// ); /// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -349,15 +289,41 @@ impl Parser for MultimediaFileRefn { } } -#[derive(Debug, Default)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// Indicates the format of the multimedia data associated with the specific GEDCOM context. This -/// allows processors to determine whether they can process the data object. Any linked files should -/// contain the data required, in the indicated format, to process the file data. +/// MultimediaFormat indicates the format of the multimedia data associated with the specific +/// GEDCOM context. This allows processors to determine whether they can process the data object. +/// Any linked files should contain the data required, in the indicated format, to process the file +/// data. /// /// NOTE: The 5.5 spec lists the following seven formats [ bmp | gif | jpg | ole | pcx | tif | wav ]. /// However, we're leaving this open for emerging formats, Option. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @MEDIA1@ OBJE\n\ +/// 1 FILE /home/user/media/file_name.bmp\n\ +/// 2 FORM bmp\n\ +/// 3 TYPE photo +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// assert_eq!(data.multimedia.len(), 1); +/// +/// let file = data.multimedia[0].file.as_ref().unwrap(); +/// +/// let form = file.form.as_ref().unwrap(); +/// assert_eq!(form.value.as_ref().unwrap(), "bmp"); +/// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); +/// ``` +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaFormat { pub value: Option, pub source_media_type: Option, @@ -399,9 +365,36 @@ impl Parser for MultimediaFormat { #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// A user-defined number or text that the submitter uses to identify this record. For instance, it -/// may be a record number within the submitter's automated or manual system, or it may be a page -/// and position number on a pedigree chart. +/// UserReferenceNumber is a user-defined number or text that the submitter uses to identify this +/// record. For instance, it may be a record number within the submitter's automated or manual +/// system, or it may be a page and position number on a pedigree chart. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 2 FORM LINEAGE-LINKED\n\ +/// 0 @MEDIA1@ OBJE\n\ +/// 1 FILE /home/user/media/file_name.bmp\n\ +/// 1 REFN 000\n\ +/// 2 TYPE User Reference Type\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// assert_eq!(data.multimedia.len(), 1); +/// +/// let user_ref = data.multimedia[0].user_reference_number.as_ref().unwrap(); +/// assert_eq!(user_ref.value.as_ref().unwrap(), "000"); +/// assert_eq!( +/// user_ref.user_reference_type.as_ref().unwrap(), +/// "User Reference Type" +/// ); +/// ``` pub struct UserReferenceNumber { /// line value pub value: Option, diff --git a/src/types/note.rs b/src/types/note.rs index 2da582a..4a34669 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::{Source, Translation}, }; @@ -18,7 +18,7 @@ use serde::{Deserialize, Serialize}; /// /// # Example /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -42,8 +42,8 @@ use serde::{Deserialize, Serialize}; /// 2 CONC ST should not be broken!\n\ /// 0 TRLR"; -/// let mut parser = GedcomRecord::new(sample.chars()); -/// let data = parser.parse_record(); +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); /// let note = data.header.unwrap().note.unwrap(); /// assert_eq!(note.value.unwrap().chars().count(), 1440); diff --git a/src/types/repository.rs b/src/types/repository.rs index 13b8b87..9b916a0 100644 --- a/src/types/repository.rs +++ b/src/types/repository.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, }; diff --git a/src/types/source.rs b/src/types/source.rs index 7aeeae5..7a0229a 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,17 +1,15 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, - types::{Date, Event, Note, RepoCitation, UserDefinedData}, + types::{Date, Event, Note, RepoCitation, UserDefinedData, Xref}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use super::Xref; - +/// Source for genealogy facts #[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -/// Source for genealogy facts pub struct Source { pub xref: Option, pub data: SourceData, @@ -95,7 +93,7 @@ impl SourceData { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -106,8 +104,8 @@ impl SourceData { /// 2 PAGE 42\n\ /// 0 TRLR"; /// -/// let mut ged = GedcomRecord::new(sample.chars()); -/// let data = ged.parse_record(); +/// let mut ged = GedcomDocument::new(sample.chars()); +/// let data = ged.parse_document(); /// /// assert_eq!(data.individuals[0].source[0].xref, "@SOURCE1@"); /// assert_eq!(data.individuals[0].source[0].page.as_ref().unwrap(), "42"); @@ -188,7 +186,7 @@ impl Parser for SourceCitation { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -201,8 +199,8 @@ impl Parser for SourceCitation { /// 3 DATE BEF 1 JAN 1900\n\ /// 0 TRLR"; /// -/// let mut ged = GedcomRecord::new(sample.chars()); -/// let data = ged.parse_record(); +/// let mut ged = GedcomDocument::new(sample.chars()); +/// let data = ged.parse_document(); /// let citation_data = data.individuals[0].source[0].data.as_ref().unwrap(); /// /// assert_eq!( @@ -270,7 +268,7 @@ impl Parser for SourceCitationData { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -286,8 +284,8 @@ impl Parser for SourceCitationData { /// 4 CONC ST should not be broken!\n\ /// 0 TRLR"; /// -/// let mut ged = GedcomRecord::new(sample.chars()); -/// let data = ged.parse_record(); +/// let mut ged = GedcomDocument::new(sample.chars()); +/// let data = ged.parse_document(); /// let citation_data = data.individuals[0].source[0].data.as_ref().unwrap(); /// /// assert_eq!( @@ -363,7 +361,7 @@ impl Parser for TextFromSource { /// # Example /// /// ``` -/// use gedcom::GedcomRecord; +/// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ /// 1 GEDC\n\ @@ -375,8 +373,8 @@ impl Parser for TextFromSource { /// 2 QUAY 1 /// 0 TRLR"; /// -/// let mut ged = GedcomRecord::new(sample.chars()); -/// let data = ged.parse_record(); +/// let mut ged = GedcomDocument::new(sample.chars()); +/// let data = ged.parse_document(); /// let quay = data.individuals[0].source[0].certainty_assessment.as_ref().unwrap(); /// /// assert_eq!( diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 8fa7f72..4c09c72 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -1,5 +1,5 @@ use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, types::{Address, ChangeDate, UserDefinedData, MultimediaLink, Note}, }; diff --git a/src/types/translation.rs b/src/types/translation.rs index c7b65ac..78f464b 100644 --- a/src/types/translation.rs +++ b/src/types/translation.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; use crate::{ - parser::Parser, + Parser, tokenizer::{Token, Tokenizer}, }; diff --git a/tests/json_feature.rs b/tests/json_feature.rs index 9d9fc29..0174367 100644 --- a/tests/json_feature.rs +++ b/tests/json_feature.rs @@ -2,7 +2,7 @@ #[cfg(feature = "json")] mod json_feature_tests { use super::lib::util::read_relative; - use gedcom::{parse, types::Name}; + use gedcom::{parse_ged, types::Name}; use serde_json; use serde_test::{assert_tokens, Token}; diff --git a/tests/lib.rs b/tests/lib.rs index 056558d..a99b707 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -11,7 +11,7 @@ pub mod util { #[cfg(test)] mod tests { use super::util::read_relative; - use gedcom::GedcomRecord; + use gedcom::GedcomDocument; use gedcom::types::event::HasEvents; #[test] @@ -20,8 +20,8 @@ mod tests { // let simple_ged: String = read_relative("./tests/fixtures/allged.ged"); assert!(simple_ged.len() > 0); - let mut parser = GedcomRecord::new(simple_ged.chars()); - let data = parser.parse_record(); + let mut doc = GedcomDocument::new(simple_ged.chars()); + let data = doc.parse_document(); assert_eq!(data.individuals.len(), 3); assert_eq!(data.families.len(), 1); assert_eq!(data.submitters.len(), 1); @@ -72,8 +72,8 @@ mod tests { let simple_ged: String = read_relative("./tests/fixtures/washington.ged"); assert!(simple_ged.len() > 0); - let mut parser = GedcomRecord::new(simple_ged.chars()); - let data = parser.parse_record(); + let mut doc = GedcomDocument::new(simple_ged.chars()); + let data = doc.parse_document(); assert_eq!(data.individuals.len(), 538); assert_eq!(data.families.len(), 278); // assert_eq!(data.submitters.len(), 0); From 8c6b26be63b928b79b079acab707a4668858df44 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 26 Nov 2022 16:06:26 -0600 Subject: [PATCH 35/55] Add tests for Date --- README.md | 2 +- src/types/date.rs | 31 ++++++++++++++++++++++++++++++- src/types/event.rs | 15 ++++++++------- src/types/individual.rs | 2 +- tests/lib.rs | 4 ++-- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 406a871..ce96236 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ GEDCOM is a file format for sharing genealogical information like family trees. `rust-gedcom` hopes to be ~~fully~~ mostly compliant with the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). -Later specifications, such as [5.5.2](https://jfcardinal.github.io/GEDCOM-5.5.2/gedcom-5.5.2.html) and [7.0.11](https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#purpose-and-content-of-the-familysearch-gedcom-specification) are useful in assessing which tags are worth supporting or not. +Later specifications, such as [5.5.2](https://jfcardinal.github.io/GEDCOM-5.5.2/gedcom-5.5.2.html) and [7.0.11](https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#purpose-and-content-of-the-familysearch-gedcom-specification), are useful in assessing which tags are worth supporting or not. ## Usage diff --git a/src/types/date.rs b/src/types/date.rs index ef2c0e5..9af0fae 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -8,7 +8,36 @@ use crate::{ use serde::{Deserialize, Serialize}; -/// TODO Date should encompasses a number of date formats, e.g. approximated, period, phrase and range. +/// Date encompasses a number of date formats, e.g. approximated, period, phrase and range. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 1 DATE 2 Oct 2019 +/// 2 TIME 0:00:00 +/// 0 @I1@ INDI +/// 1 NAME Ancestor +/// 1 BIRT +/// 2 DATE BEF 1828 +/// 1 RESI +/// 2 PLAC 100 Broadway, New York, NY 10005 +/// 2 DATE from 1900 to 1905 +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let head_date = data.header.unwrap().date.unwrap(); +/// assert_eq!(head_date.value.unwrap(), "2 Oct 2019"); +/// +/// let resi_date = data.individuals[0].events[0].date.as_ref().unwrap(); +/// assert_eq!(resi_date.value.as_ref().unwrap(), "BEF 1828"); +/// ``` #[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Date { diff --git a/src/types/event.rs b/src/types/event.rs index 1338c92..096856b 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,8 +1,9 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, - types::{ChildToFamilyLink, Note, SourceCitation}, + types::{ChildToFamilyLink, Date, Note, SourceCitation}, + Parser, }; + #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; use std::{fmt, string::ToString}; @@ -35,7 +36,7 @@ impl ToString for EventType { #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Event { pub event: EventType, - pub date: Option, + pub date: Option, pub place: Option, pub note: Option, pub child_to_family_link: Option, @@ -102,8 +103,8 @@ impl std::fmt::Debug for Event { pub trait HasEvents { fn add_event(&mut self, event: Event) -> (); fn events(&self) -> Vec; - fn dates(&self) -> Vec { - let mut dates: Vec = Vec::new(); + fn dates(&self) -> Vec { + let mut dates: Vec = Vec::new(); for event in self.events() { if let Some(d) = &event.date { dates.push(d.clone()); @@ -135,13 +136,13 @@ impl Parser for Event { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(tokenizer.take_line_value()), + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), "PLAC" => self.place = Some(tokenizer.take_line_value()), "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), "FAMC" => { self.child_to_family_link = Some(ChildToFamilyLink::new(tokenizer, level + 1)) - }, + } "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), }, diff --git a/src/types/individual.rs b/src/types/individual.rs index 1d8dc06..b043c75 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -16,10 +16,10 @@ pub struct Individual { pub sex: Gender, pub families: Vec, pub custom_data: Vec, - pub last_updated: Option, pub source: Vec, pub multimedia: Vec, pub events: Vec, + pub last_updated: Option, } impl Individual { diff --git a/tests/lib.rs b/tests/lib.rs index a99b707..ab21d99 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -64,7 +64,7 @@ mod tests { let events = data.families[0].events(); assert_eq!(events.len(), 1); assert_eq!(events[0].event.to_string(), "Marriage"); - assert_eq!(events[0].date.as_ref().unwrap(), "1 APR 1950"); + assert_eq!(events[0].date.as_ref().unwrap().value.as_ref().unwrap(), "1 APR 1950"); } #[test] @@ -104,6 +104,6 @@ mod tests { let events = data.families[0].events(); assert_eq!(events.len(), 1); assert_eq!(events[0].event.to_string(), "Marriage"); - assert_eq!(events[0].date.as_ref().unwrap(), "6 MAR 1730"); + assert_eq!(events[0].date.as_ref().unwrap().value.as_ref().unwrap(), "6 MAR 1730"); } } From c31c9a25bb3b7c116ba8803939d0891c3e154ef1 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 26 Nov 2022 16:10:18 -0600 Subject: [PATCH 36/55] Modify test for Date --- src/types/date.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/types/date.rs b/src/types/date.rs index 9af0fae..ffdd9bb 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -35,8 +35,11 @@ use serde::{Deserialize, Serialize}; /// let head_date = data.header.unwrap().date.unwrap(); /// assert_eq!(head_date.value.unwrap(), "2 Oct 2019"); /// -/// let resi_date = data.individuals[0].events[0].date.as_ref().unwrap(); -/// assert_eq!(resi_date.value.as_ref().unwrap(), "BEF 1828"); +/// let birt_date = data.individuals[0].events[0].date.as_ref().unwrap(); +/// assert_eq!(birt_date.value.as_ref().unwrap(), "BEF 1828"); +/// +/// let resi_date = data.individuals[0].events[1].date.as_ref().unwrap(); +/// assert_eq!(resi_date.value.as_ref().unwrap(), "from 1900 to 1905"); /// ``` #[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] From 9674aff3db2c7a38aac13cb406903b26eaa62fd4 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 26 Nov 2022 16:37:34 -0600 Subject: [PATCH 37/55] Handle LineValue for events, e.g. RESI --- .gitignore | 2 ++ src/types/event.rs | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/.gitignore b/.gitignore index 0592392..c97fb41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target .DS_Store +/tests/tmp.rs +/tests/fixtures/tmp.ged diff --git a/src/types/event.rs b/src/types/event.rs index 096856b..c477938 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -36,6 +36,7 @@ impl ToString for EventType { #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Event { pub event: EventType, + pub value: Option, pub date: Option, pub place: Option, pub note: Option, @@ -48,6 +49,7 @@ impl Event { pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> Event { let mut event = Event { event: Self::from_tag(tag), + value: None, date: None, place: None, note: None, @@ -127,6 +129,14 @@ impl Parser for Event { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); + // handle value on event line + let mut value = String::new(); + + if let Token::LineValue(val) = &tokenizer.current_token { + value.push_str(&val); + tokenizer.next_token(); + } + loop { if let Token::Level(cur_level) = tokenizer.current_token { if cur_level <= level { @@ -150,5 +160,9 @@ impl Parser for Event { _ => panic!("Unhandled Event Token: {:?}", tokenizer.current_token), } } + + if &value != "" { + self.value = Some(value); + } } } From f97c873b75af333803ed62e49b15577f749a7503 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 26 Nov 2022 23:33:40 -0600 Subject: [PATCH 38/55] Modify Continuation and Gender datasets --- src/tokenizer.rs | 4 +- src/types/copyright.rs | 79 ---------------------- src/types/date.rs | 4 +- src/types/header.rs | 24 +++---- src/types/individual.rs | 143 ++++++++++++++++++++++++++++++++-------- src/types/mod.rs | 3 - src/types/multimedia.rs | 18 ++--- src/types/note.rs | 15 +---- src/types/source.rs | 2 +- 9 files changed, 142 insertions(+), 150 deletions(-) delete mode 100644 src/types/copyright.rs diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ee772a4..adb123a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -196,13 +196,13 @@ impl<'a> Tokenizer<'a> { value.push_str(&self.take_line_value()) } "CONC" => { - value.push(' '); + // value.push(' '); value.push_str(&self.take_line_value()) } _ => panic!("{} Unhandled Continuation Tag: {}", self.debug(), tag), }, Token::Level(_) => self.next_token(), - _ => panic!("Unhandled Continuation Token: {:?}", self.current_token), + _ => panic!("{} Unhandled Continuation Token: {:?}", self.debug(), self.current_token), } } value diff --git a/src/types/copyright.rs b/src/types/copyright.rs deleted file mode 100644 index 5f6b129..0000000 --- a/src/types/copyright.rs +++ /dev/null @@ -1,79 +0,0 @@ -#[cfg(feature = "json")] -use serde::{Deserialize, Serialize}; - -use crate::{ - Parser, - tokenizer::{Token, Tokenizer}, -}; - -/// A copyright statement, as appropriate for the copyright laws applicable to this data. -/// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#COPR -/// -/// # Example -/// -/// ``` -/// use gedcom::GedcomDocument; -/// -/// let sample = "\ -/// 0 HEAD\n\ -/// 1 GEDC\n\ -/// 2 VERS 5.5\n\ -/// 1 COPR (C) 1997-2000 by H. Eichmann.\n\ -/// 2 CONT You can use and distribute this file freely as long as you do not charge for it.\n\ -/// 0 TRLR"; -/// -/// let mut doc = GedcomDocument::new(sample.chars()); -/// let data = doc.parse_document(); -/// let header = data.header.unwrap(); -/// let copr = header.copyright.unwrap(); -/// -/// assert_eq!(copr.value.unwrap(), "(C) 1997-2000 by H. Eichmann."); -/// assert_eq!( -/// copr.continued.unwrap(), -/// "You can use and distribute this file freely as long as you do not charge for it." -/// ); -/// ``` -#[derive(Debug, Default)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct Copyright { - pub value: Option, - /// tag: CONT - pub continued: Option, -} - -impl Copyright { - #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Copyright { - let mut copr = Copyright::default(); - copr.parse(tokenizer, level); - copr - } -} - -impl Parser for Copyright { - /// parse the COPR tag - fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(tokenizer.take_line_value()); - - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" => self.continued = Some(tokenizer.take_line_value()), - "CONC" => self.continued = Some(tokenizer.take_line_value()), - _ => panic!( - "{} unhandled COPR tag in header: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled tag in COPR: {:?}", tokenizer.current_token), - } - } - } -} diff --git a/src/types/date.rs b/src/types/date.rs index ffdd9bb..a7047dd 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -112,8 +112,6 @@ impl Parser for Date { /// 2 DATE 1 APR 1998 /// 3 TIME 12:34:56.789 /// 2 NOTE A note -/// 3 CONT Note continued here. The word TE -/// 3 CONC ST should not be broken! /// 0 TRLR"; /// /// let mut doc = GedcomDocument::new(sample.chars()); @@ -128,7 +126,7 @@ impl Parser for Date { /// assert_eq!(date.time.as_ref().unwrap(), "12:34:56.789"); /// /// let chan_note = chan.note.as_ref().unwrap(); -/// assert_eq!(chan_note.value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); +/// assert_eq!(chan_note.value.as_ref().unwrap(), "A note"); /// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] diff --git a/src/types/header.rs b/src/types/header.rs index d6f8d61..5d81c2d 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,7 +1,7 @@ use crate::{ Parser, tokenizer::{Token, Tokenizer}, - types::{Copyright, Corporation, Date, Note}, + types::{Corporation, Date, Note}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -64,7 +64,7 @@ pub struct Header { /// tag: SUBN pub submission_tag: Option, /// tag: COPR - pub copyright: Option, + pub copyright: Option, /// tag: LANG (HEAD-LANG), a default language which may be used to interpret any Text-typed /// payloads that lack a specific language tag from a LANG structure. An application may choose /// to use a different default based on its knowledge of the language preferences of the user. @@ -105,18 +105,18 @@ impl Parser for Header { while tokenizer.current_token != Token::Level(level) { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "GEDC" => self.gedcom = Some(GedcomMeta::new(tokenizer, 1)), - "SOUR" => self.source = Some(HeadSour::new(tokenizer, 1)), + "GEDC" => self.gedcom = Some(GedcomMeta::new(tokenizer, level + 1)), + "SOUR" => self.source = Some(HeadSour::new(tokenizer, level + 1)), "DEST" => self.destination = Some(tokenizer.take_line_value()), - "DATE" => self.date = Some(Date::new(tokenizer, 1)), + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), "SUBM" => self.submitter_tag = Some(tokenizer.take_line_value()), "SUBN" => self.submission_tag = Some(tokenizer.take_line_value()), "FILE" => self.filename = Some(tokenizer.take_line_value()), - "COPR" => self.copyright = Some(Copyright::new(tokenizer, 1)), - "CHAR" => self.encoding = Some(Encoding::new(tokenizer, 1)), + "COPR" => self.copyright = Some(tokenizer.take_continued_text(level + 1)), + "CHAR" => self.encoding = Some(Encoding::new(tokenizer, level + 1)), "LANG" => self.language = Some(tokenizer.take_line_value()), - "NOTE" => self.note = Some(Note::new(tokenizer, 1)), - "PLAC" => self.place = Some(HeadPlac::new(tokenizer, 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "PLAC" => self.place = Some(HeadPlac::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Header Tag: {}", tokenizer.debug(), tag), }, Token::CustomTag(tag) => { @@ -385,7 +385,7 @@ impl Parser for HeadSour { /// assert_eq!(sour_data.value.unwrap(), "Name of source data"); /// assert_eq!(sour_data.date.unwrap().value.unwrap(), "1 JAN 1998"); /// assert_eq!( -/// sour_data.copyright.unwrap().value.unwrap(), +/// sour_data.copyright.unwrap(), /// "Copyright of source data" /// ); /// ``` @@ -396,7 +396,7 @@ pub struct HeadSourData { /// tag: DATE pub date: Option, /// tag: COPR - pub copyright: Option, + pub copyright: Option, } impl HeadSourData { @@ -422,7 +422,7 @@ impl Parser for HeadSourData { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "COPR" => self.copyright = Some(Copyright::new(tokenizer, level + 1)), + "COPR" => self.copyright = Some(tokenizer.take_continued_text(level+1)), _ => panic!( "{} unhandled DATA tag in header: {}", tokenizer.debug(), diff --git a/src/types/individual.rs b/src/types/individual.rs index b043c75..79d8940 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,7 +1,9 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, Event, MultimediaRecord, Note, SourceCitation, UserDefinedData, Xref}, + types::{ + event::HasEvents, Event, MultimediaRecord, Note, SourceCitation, UserDefinedData, Xref, + }, + Parser, }; #[cfg(feature = "json")] @@ -13,7 +15,7 @@ use serde::{Deserialize, Serialize}; pub struct Individual { pub xref: Option, pub name: Option, - pub sex: Gender, + pub sex: Option, pub families: Vec, pub custom_data: Vec, pub source: Vec, @@ -28,7 +30,7 @@ impl Individual { let mut indi = Individual { xref, name: None, - sex: Gender::Unknown, + sex: None, events: Vec::new(), families: Vec::new(), custom_data: Vec::new(), @@ -85,7 +87,7 @@ impl Parser for Individual { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "NAME" => self.name = Some(Name::new(tokenizer, level + 1)), - "SEX" => self.sex = Gender::new(tokenizer, level + 1), + "SEX" => self.sex = Some(Gender::new(tokenizer, level + 1)), "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" @@ -123,42 +125,106 @@ impl Parser for Individual { } } -/// Gender of an `Individual` +/// GenderType is a set of enumerated values that indicate the sex of an individual at birth. See +/// 5.5 specification, p. 61; https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#SEX #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub enum Gender { +pub enum GenderType { + /// Tag 'X' Male, + /// TAG 'M' Female, + /// Tag 'X'; "Does not fit the typical definition of only Male or only Female" Nonbinary, + /// Tag 'U'; "Cannot be determined from available sources" Unknown, } +impl GenderType { + pub fn get_str(&self) -> &str { + match self { + GenderType::Male => "M", + GenderType::Female => "F", + GenderType::Nonbinary => "X", + GenderType::Unknown => "U", + } + } +} + +/// Gender (tag: SEX); This can describe an individual’s reproductive or sexual anatomy at birth. +/// Related concepts of gender identity or sexual preference are not currently given their own tag. +/// Cultural or personal gender preference may be indicated using the FACT tag. See +/// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#SEX +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 SEX M +/// 2 FACT A fact about an individual's gen +/// 3 CONC der +/// 2 SOUR @CITATION1@ +/// 3 PAGE Page +/// 4 CONC : 132 +/// 3 _MYOWNTAG This is a non-standard tag. Not recommended but allowed +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let sex = data.individuals[0].sex.as_ref().unwrap(); +/// assert_eq!(sex.value.get_str(), "M"); +/// assert_eq!(sex.fact.as_ref().unwrap(), "A fact about an individual's gender"); +/// assert_eq!(sex.sources[0].xref, "@CITATION1@"); +/// assert_eq!(sex.sources[0].page.as_ref().unwrap(), "Page: 132"); +/// assert_eq!(sex.sources[0].custom_data[0].tag, "_MYOWNTAG"); +/// assert_eq!(sex.sources[0].custom_data[0].value, "This is a non-standard tag. Not recommended but allowed"); +/// ``` +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct Gender { + pub value: GenderType, + pub fact: Option, + pub sources: Vec, + pub custom_data: Vec, +} + impl Gender { pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Gender { - let mut gender = Gender::Unknown; - gender.parse(tokenizer, level); - gender + let mut sex = Gender { + value: GenderType::Unknown, + fact: None, + sources: Vec::new(), + custom_data: Vec::new(), + }; + sex.parse(tokenizer, level); + sex + } + + pub fn add_source_citation(&mut self, sour: SourceCitation) { + self.sources.push(sour); } - pub fn get_gender(&self) -> &str { - match &self { - Gender::Male => "M", - Gender::Female => "F", - Gender::Nonbinary => "N", - Gender::Unknown => "U", - } + pub fn add_custom_data(&mut self, data: UserDefinedData) { + self.custom_data.push(data) } } impl Parser for Gender { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); + if let Token::LineValue(gender_string) = &tokenizer.current_token { - *self = match gender_string.as_str() { - "M" => Gender::Male, - "F" => Gender::Female, - "N" => Gender::Nonbinary, - "U" => Gender::Unknown, + self.value = match gender_string.as_str() { + "M" => GenderType::Male, + "F" => GenderType::Female, + "X" => GenderType::Nonbinary, + "U" => GenderType::Unknown, _ => panic!( "{} Unknown gender value {} ({})", tokenizer.debug(), @@ -166,13 +232,34 @@ impl Parser for Gender { level ), }; - } else { - panic!( - "Expected gender LineValue, found {:?}", - tokenizer.current_token - ); + tokenizer.next_token(); + } + + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "FACT" => self.fact = Some(tokenizer.take_continued_text(level + 1)), + "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), + _ => panic!("{}, Unhandled Gender tag: {}", tokenizer.debug(), tag), + }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + } + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{}, Unhandled Gender token: {:?}", + tokenizer.debug(), + tokenizer.current_token + ), + } } - tokenizer.next_token(); } } diff --git a/src/types/mod.rs b/src/types/mod.rs index 0450fa9..7d6007a 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -46,9 +46,6 @@ pub use translation::*; mod repository; pub use repository::*; -mod copyright; -pub use copyright::*; - mod corporation; pub use corporation::*; diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index fbb93f6..51a9ccd 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -1,14 +1,9 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, - types::{Note, SourceCitation, Xref}, + types::{ChangeDate, Note, SourceCitation, Xref}, + Parser, }; -use super::ChangeDate; - -#[derive(Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] - /// MultimediaRecord refers to 1 or more external digital files, and may provide some /// additional information about the files and the media they encode. /// @@ -49,6 +44,8 @@ use super::ChangeDate; /// let rin = obje.automated_record_id.as_ref().unwrap(); /// assert_eq!(rin, "Automated Id"); /// ``` +#[derive(Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaRecord { /// Optional reference to link to this submitter pub xref: Option, @@ -289,7 +286,6 @@ impl Parser for MultimediaFileRefn { } } - /// MultimediaFormat indicates the format of the multimedia data associated with the specific /// GEDCOM context. This allows processors to determine whether they can process the data object. /// Any linked files should contain the data required, in the indicated format, to process the file @@ -351,7 +347,11 @@ impl Parser for MultimediaFormat { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "TYPE" => self.source_media_type = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled MultimediaFormat Tag: {}", tokenizer.debug(), tag), + _ => panic!( + "{} Unhandled MultimediaFormat Tag: {}", + tokenizer.debug(), + tag + ), }, _ => panic!( "Unhandled MultimediaFormat Token: {:?}", diff --git a/src/types/note.rs b/src/types/note.rs index 4a34669..9b21008 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -1,7 +1,7 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, types::{Source, Translation}, + Parser, }; #[cfg(feature = "json")] @@ -81,10 +81,7 @@ impl Note { impl Parser for Note { /// parse handles the NOTE tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - let mut value = String::new(); - - value.push_str(&tokenizer.take_line_value()); - + self.value = Some(tokenizer.take_continued_text(level)); loop { if let Token::Level(cur_level) = tokenizer.current_token { if cur_level <= level { @@ -97,19 +94,11 @@ impl Parser for Note { "MIME" => self.mime = Some(tokenizer.take_line_value()), "TRANS" => self.translation = Some(Translation::new(tokenizer, level + 1)), "LANG" => self.language = Some(tokenizer.take_line_value()), - "CONC" => value.push_str(&tokenizer.take_line_value()), - "CONT" => { - value.push('\n'); - value.push_str(&tokenizer.take_line_value()); - } _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), } } - if value != "" { - self.value = Some(value); - } } } diff --git a/src/types/source.rs b/src/types/source.rs index 7a0229a..e190aee 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -154,7 +154,7 @@ impl Parser for SourceCitation { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "PAGE" => self.page = Some(tokenizer.take_line_value()), + "PAGE" => self.page = Some(tokenizer.take_continued_text(level + 1)), "DATA" => self.data = Some(SourceCitationData::new(tokenizer, level + 1)), "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), "QUAY" => { From d222adcd1d614a247aba68a10a0bfb0e2d4e79a0 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 26 Nov 2022 23:46:18 -0600 Subject: [PATCH 39/55] Add Cremation event type --- src/types/event.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/types/event.rs b/src/types/event.rs index c477938..5dd9ce9 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -18,6 +18,7 @@ pub enum EventType { Death, Christening, Marriage, + Cremation, Residence, SourceData(String), @@ -73,9 +74,10 @@ impl Event { "CHR" => EventType::Christening, "DEAT" => EventType::Death, "MARR" => EventType::Marriage, + "CREM" => EventType::Cremation, "RESI" => EventType::Residence, "OTHER" => EventType::Other, - _ => panic!("Unrecognized event tag: {}", tag), + _ => panic!("Unrecognized EventType tag: {}", tag), } } From d67b09589bd13a54f285ad7ed7a161f31eac085c Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 27 Nov 2022 14:22:52 -0600 Subject: [PATCH 40/55] Modify FamilyLink dataset --- .gitignore | 2 - src/lib.rs | 24 +++--- src/types/event.rs | 7 +- src/types/family.rs | 83 +----------------- src/types/individual.rs | 182 ++++++++++++++++++++++++++++++++++++---- 5 files changed, 185 insertions(+), 113 deletions(-) diff --git a/.gitignore b/.gitignore index c97fb41..0592392 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,2 @@ /target .DS_Store -/tests/tmp.rs -/tests/fixtures/tmp.ged diff --git a/src/lib.rs b/src/lib.rs index bdb778e..d5120ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,9 +44,9 @@ use types::{ /// ```rust /// use gedcom::GedcomDocument; /// let sample = "\ -/// 0 HEAD -/// 1 GEDC -/// 2 VERS 5.5 +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ /// 0 TRLR"; /// /// let mut doc = GedcomDocument::new(sample.chars()); @@ -88,16 +88,16 @@ pub trait Parser { /// ```rust /// use gedcom::GedcomDocument; /// let sample = "\ -/// 0 HEAD -/// 1 GEDC -/// 2 VERS 5.5 -/// 0 @SUBMITTER@ SUBM -/// 0 @PERSON1@ INDI -/// 0 @FAMILY1@ FAM -/// 0 @R1@ REPO -/// 0 @SOURCE1@ SOUR +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @SUBMITTER@ SUBM\n\ +/// 0 @PERSON1@ INDI\n\ +/// 0 @FAMILY1@ FAM\n\ +/// 0 @R1@ REPO\n\ +/// 0 @SOURCE1@ SOUR\n\ /// 0 @MEDIA1@ OBJE\n\ -/// 0 _MYOWNTAG This is a non-standard tag. Not recommended but allowed +/// 0 _MYOWNTAG This is a non-standard tag. Not recommended but allowed\n\ /// 0 TRLR"; /// /// let mut doc = GedcomDocument::new(sample.chars()); diff --git a/src/types/event.rs b/src/types/event.rs index 5dd9ce9..51dde24 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,6 +1,6 @@ use crate::{ tokenizer::{Token, Tokenizer}, - types::{ChildToFamilyLink, Date, Note, SourceCitation}, + types::{Date, FamilyLink, Note, SourceCitation}, Parser, }; @@ -41,7 +41,7 @@ pub struct Event { pub date: Option, pub place: Option, pub note: Option, - pub child_to_family_link: Option, + pub child_to_family_link: Option, pub citations: Vec, } @@ -152,8 +152,9 @@ impl Parser for Event { "PLAC" => self.place = Some(tokenizer.take_line_value()), "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), "FAMC" => { + let tag_clone = tag.clone(); self.child_to_family_link = - Some(ChildToFamilyLink::new(tokenizer, level + 1)) + Some(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())) } "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), diff --git a/src/types/family.rs b/src/types/family.rs index 2e48bd8..9ab517e 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,7 +1,7 @@ use crate::{ Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, Event, Note}, + types::{event::HasEvents, Event}, }; #[cfg(feature = "json")] @@ -96,84 +96,3 @@ impl HasEvents for Family { self.events.clone() } } - -/// ChildToFamilyLink ...TODO -/// -/// # Example -/// -/// ``` -/// use gedcom::GedcomDocument; -/// let sample = "\ -/// 0 HEAD\n\ -/// 1 GEDC\n\ -/// 2 VERS 5.5\n\ -/// 2 FORM LINEAGE-LINKED\n\ -/// 0 @PERSON1@ INDI\n\ -/// 1 NAME given name /surname/jr. -/// 1 BIRT -/// 2 DATE 31 DEC 1997 -/// 2 PLAC The place -/// 2 FAMC @PARENTS@ -/// 0 TRLR"; -/// -/// let mut ged = GedcomDocument::new(sample.chars()); -/// let data = ged.parse_document(); -/// -/// assert_eq!(data.individuals[0].events[0].child_to_family_link.as_ref().unwrap().xref.as_ref().unwrap(), "@PARENTS@"); -/// -/// ``` -#[derive(Clone, Debug)] -#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct ChildToFamilyLink { - pub xref: Option, - pub pedigree_linkage_type: Option, - pub child_linkage_status: Option, - pub note: Option, -} - -impl ChildToFamilyLink { - pub fn new(tokenizer: &mut Tokenizer, level: u8) -> ChildToFamilyLink { - let mut famc = ChildToFamilyLink { - xref: None, - pedigree_linkage_type: None, - child_linkage_status: None, - note: None, - }; - famc.parse(tokenizer, level); - famc - } -} - -impl Parser for ChildToFamilyLink { - fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.xref = Some(tokenizer.take_line_value()); - - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - - tokenizer.next_token(); - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PEDI" => self.pedigree_linkage_type = Some(tokenizer.take_line_value()), - "STAT" => self.child_linkage_status = Some(tokenizer.take_line_value()), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - _ => panic!( - "{} unhandled ChildToFamilyLink tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled ChildToFamilyLink Token: {:?}", - tokenizer.current_token - ), - } - } - } -} diff --git a/src/types/individual.rs b/src/types/individual.rs index 79d8940..4b58fc9 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -44,9 +44,9 @@ impl Individual { pub fn add_family(&mut self, link: FamilyLink) { let mut do_add = true; - let xref = &link.0; - for FamilyLink(family, _, _) in &self.families { - if family.as_str() == xref.as_str() { + let xref = &link.xref; + for family in &self.families { + if family.xref.as_str() == xref.as_str() { do_add = false; } } @@ -130,9 +130,9 @@ impl Parser for Individual { #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub enum GenderType { - /// Tag 'X' + /// Tag 'M' Male, - /// TAG 'M' + /// TAG 'F' Female, /// Tag 'X'; "Does not fit the typical definition of only Male or only Female" Nonbinary, @@ -263,25 +263,144 @@ impl Parser for Gender { } } -#[derive(Debug)] +/// FamilyLinkType is a code used to indicates whether a family link is a pointer to a family +/// where this person is a child (FAMC tag), or it is pointer to a family where this person is a +/// spouse or parent (FAMS tag). See GEDCOM 5.5 spec, page 26. +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -enum FamilyLinkType { +pub enum FamilyLinkType { Spouse, Child, } -#[derive(Debug)] +impl FamilyLinkType { + pub fn get_str(&self) -> &str { + match self { + FamilyLinkType::Child => "FAMC", + FamilyLinkType::Spouse => "FAMS", + } + } +} + +/// Pedigree is a code used to indicate the child to family relationship for pedigree navigation +/// purposes. See GEDCOM 5.5 spec, page 57. +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -enum Pedigree { +pub enum Pedigree { + /// Adopted indicates adoptive parents. Adopted, + /// Birth indicates birth parents. Birth, + /// Foster indicates child was included in a foster or guardian family. Foster, + /// Sealing indicates child was sealed to parents other than birth parents. Sealing, } -#[derive(Debug)] +impl Pedigree { + pub fn get_str(&self) -> &str { + match self { + Pedigree::Birth => "birth", + Pedigree::Foster => "foster", + Pedigree::Adopted => "adopted", + Pedigree::Sealing => "sealing", + } + } +} + +/// ChildLinkStatus is a A status code that allows passing on the users opinion of the status of a +/// child to family link. See GEDCOM 5.5 spec, page 44. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub enum ChildLinkStatus { + /// Challenged indicates linking this child to this family is suspect, but the linkage has been + /// neither proven nor disproven. + Challenged, + /// Disproven indicates there has been a claim by some that this child belongs to this family, + /// but the linkage has been disproven. + Disproven, + /// Proven indicates there has been a claim by some that this child does not belong to this + /// family, but the linkage has been proven. + Proven, +} + +impl ChildLinkStatus { + pub fn get_str(&self) -> &str { + match self { + ChildLinkStatus::Proven => "proven", + ChildLinkStatus::Disproven => "disproven", + ChildLinkStatus::Challenged => "challenged", + } + } +} + +/// AdoptedByWhichParent is a code which shows which parent in the associated family record adopted +/// this person. See GEDCOM 5.5 spec, page 42. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub enum AdoptedByWhichParent { + /// The HUSBand in the associated family adopted this person. + Husband, + /// The WIFE in the associated family adopted this person. + Wife, + /// Both HUSBand and WIFE adopted this person. + Both, +} + +impl AdoptedByWhichParent { + pub fn get_str(&self) -> &str { + match self { + AdoptedByWhichParent::Wife => "WIFE", + AdoptedByWhichParent::Husband => "HUSB", + AdoptedByWhichParent::Both => "BOTH", + } + } +} + +/// FamilyLink indicates the normal lineage links through the use of pointers from the individual +/// to a family through either the FAMC tag or the FAMS tag. The FAMC tag provides a pointer to a +/// family where this person is a child. The FAMS tag provides a pointer to a family where this +/// person is a spouse or parent. See GEDCOM 5.5 spec, page 26. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 NAME given name\n\ +/// 1 SEX M\n\ +/// 1 ADOP\n\ +/// 2 DATE CAL 31 DEC 1897\n\ +/// 2 FAMC @ADOPTIVE_PARENTS@\n\ +/// 3 PEDI adopted +/// 3 ADOP BOTH\n\ +/// 3 STAT proven +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let famc = data.individuals[0].events[0].child_to_family_link.as_ref().unwrap(); +/// assert_eq!(famc.xref, "@ADOPTIVE_PARENTS@"); +/// assert_eq!(famc.family_link_type.get_str(), "FAMC"); +/// assert_eq!(famc.pedigree_linkage_type.as_ref().unwrap().get_str(), "adopted"); +/// assert_eq!(famc.child_linkage_status.as_ref().unwrap().get_str(), "proven"); +/// assert_eq!(famc.adopted_by.as_ref().unwrap().get_str(), "BOTH"); +/// ``` +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct FamilyLink(Xref, FamilyLinkType, Option); +pub struct FamilyLink { + pub xref: Xref, + pub family_link_type: FamilyLinkType, + pub pedigree_linkage_type: Option, + pub child_linkage_status: Option, + pub adopted_by: Option, + pub note: Option, +} impl FamilyLink { #[must_use] @@ -292,20 +411,51 @@ impl FamilyLink { "FAMS" => FamilyLinkType::Spouse, _ => panic!("Unrecognized family type tag: {}", tag), }; - let mut family_link = FamilyLink(xref, link_type, None); + let mut family_link = FamilyLink { + xref, + family_link_type: link_type, + pedigree_linkage_type: None, + child_linkage_status: None, + adopted_by: None, + note: None, + }; family_link.parse(tokenizer, level); family_link } pub fn set_pedigree(&mut self, pedigree_text: &str) { - self.2 = match pedigree_text.to_lowercase().as_str() { + self.pedigree_linkage_type = match pedigree_text.to_lowercase().as_str() { "adopted" => Some(Pedigree::Adopted), "birth" => Some(Pedigree::Birth), "foster" => Some(Pedigree::Foster), "sealing" => Some(Pedigree::Sealing), - _ => panic!("Unrecognized family link pedigree: {}", pedigree_text), + _ => panic!("Unrecognized FamilyLink.pedigree code: {}", pedigree_text), }; } + + pub fn set_child_linkage_status(&mut self, status_text: &str) { + self.child_linkage_status = match status_text.to_lowercase().as_str() { + "challenged" => Some(ChildLinkStatus::Challenged), + "disproven" => Some(ChildLinkStatus::Disproven), + "proven" => Some(ChildLinkStatus::Proven), + _ => panic!( + "Unrecognized FamilyLink.child_linkage_status code: {}", + status_text + ), + } + } + + pub fn set_adopted_by_which_parent(&mut self, adopted_by_text: &str) { + self.adopted_by = match adopted_by_text.to_lowercase().as_str() { + "husb" => Some(AdoptedByWhichParent::Husband), + "wife" => Some(AdoptedByWhichParent::Wife), + "both" => Some(AdoptedByWhichParent::Both), + _ => panic!( + "Unrecognized FamilyLink.adopted_by code: {}", + adopted_by_text + ), + } + } } impl Parser for FamilyLink { @@ -319,6 +469,10 @@ impl Parser for FamilyLink { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "PEDI" => self.set_pedigree(tokenizer.take_line_value().as_str()), + "STAT" => self.set_child_linkage_status(&tokenizer.take_line_value().as_str()), + "ADOP" => { + self.set_adopted_by_which_parent(&tokenizer.take_line_value().as_str()) + } _ => panic!("{} Unhandled FamilyLink Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), From 7ff27b5d209c3496906651212a84e96dac9b2bff Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 27 Nov 2022 20:11:46 -0600 Subject: [PATCH 41/55] Add handling for Submission Record --- README.md | 5 +- src/lib.rs | 13 ++++- src/tokenizer.rs | 8 +-- src/types/mod.rs | 3 + src/types/submission.rs | 121 ++++++++++++++++++++++++++++++++++++++++ src/types/submitter.rs | 4 +- 6 files changed, 142 insertions(+), 12 deletions(-) create mode 100644 src/types/submission.rs diff --git a/README.md b/README.md index ce96236..e77360c 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Later specifications, such as [5.5.2](https://jfcardinal.github.io/GEDCOM-5.5.2/ ## Usage -This crate comes in two parts. The first is a binary called `parse_gedcom`, mostly used for my testing & development. It prints the `GedcomData` object and some stats about the gedcom file passed into it: +This crate comes in two parts. The first is a binary called `parse_gedcom`, mostly used for my testing & development. It prints the `GedcomData` object and some stats about the GEDCOM file passed into it: ```bash parse_gedcom ./tests/fixtures/sample.ged @@ -27,6 +27,7 @@ parse_gedcom ./tests/fixtures/sample.ged # ---------------------- # | Gedcom Data Stats: | # ---------------------- +# submissions: 0 # submitters: 1 # individuals: 3 # families: 2 @@ -58,8 +59,6 @@ Here are some notes about parsed data & tags. Page references are to the [Gedcom ### Top-level tags -* `SUBMISSION_RECORD` - p.28 - No attempt at handling this is made. - Tags for families (`FAM`), individuals (`IND`), repositories (`REPO`), sources (`SOUR`), and submitters (`SUBM`) are handled. Many of the most common sub-tags for these are handled though some may not yet be parsed. Mileage may vary. ## License diff --git a/src/lib.rs b/src/lib.rs index d5120ef..3540850 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,10 +31,10 @@ use tokenizer::{Token, Tokenizer}; pub mod types; use types::{ - UserDefinedData, Family, Header, Individual, MultimediaRecord, Repository, Source, Submitter, + Family, Header, Individual, MultimediaRecord, Repository, Source, SubmissionRecord, Submitter, + UserDefinedData, }; - /// The GedcomDocument can convert the token list into a data structure. The order of the Dataset /// should be as follows: the HEAD must come first and TRLR must be last, with any RECORDs in /// between. @@ -129,6 +129,8 @@ pub struct GedcomData { pub header: Option
, /// List of submitters of the facts pub submitters: Vec, + /// List of submission records + pub submissions: Vec, /// Individuals within the family tree pub individuals: Vec, /// The family units of the tree, representing relationships between individuals @@ -176,6 +178,11 @@ impl GedcomData { self.sources.push(source); } + /// Add a `Submission` to the tree + pub fn add_submission(&mut self, submission: SubmissionRecord) { + self.submissions.push(submission); + } + /// Adds a `Submitter` to the tree pub fn add_submitter(&mut self, submitter: Submitter) { self.submitters.push(submitter); @@ -196,6 +203,7 @@ impl GedcomData { println!("----------------------"); println!("| Gedcom Data Stats: |"); println!("----------------------"); + println!(" submissions: {}", self.submissions.len()); println!(" submitters: {}", self.submitters.len()); println!(" individuals: {}", self.individuals.len()); println!(" families: {}", self.families.len()); @@ -238,6 +246,7 @@ impl Parser for GedcomData { self.add_repository(Repository::new(tokenizer, current_level, pointer)) } "SOUR" => self.add_source(Source::new(tokenizer, current_level, pointer)), + "SUBN" => self.add_submission(SubmissionRecord::new(tokenizer, level, pointer)), "SUBM" => self.add_submitter(Submitter::new(tokenizer, level, pointer)), "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level, pointer)), "TRLR" => break, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index adb123a..115a643 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -3,10 +3,10 @@ use std::str::Chars; use crate::types::UserDefinedData; -/// The base enum of Token types -/// -/// making use of [GEDCOM Standard Release 5.5.1](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf), p.11 -/// `gedcom_line: level + delim + [optional_xref_ID] + tag + [optional_line_value] + terminator` +/// The base enum of Token types making use of +/// [GEDCOM Standard Release 5.5.1](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf), +/// p.11 `gedcom_line: level + delim + [optional_xref_ID] + tag + [optional_line_value] + +/// terminator` #[derive(Clone, Debug, PartialEq)] pub enum Token { /// The `level`, denoting the depth within the tree diff --git a/src/types/mod.rs b/src/types/mod.rs index 7d6007a..39f12cd 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -31,6 +31,9 @@ pub use individual::*; mod family; pub use family::*; +mod submission; +pub use submission::*; + mod submitter; pub use submitter::*; diff --git a/src/types/submission.rs b/src/types/submission.rs new file mode 100644 index 0000000..62bf098 --- /dev/null +++ b/src/types/submission.rs @@ -0,0 +1,121 @@ +use crate::{ + tokenizer::{Token, Tokenizer}, + types::{ChangeDate, Note, UserDefinedData, Xref}, + Parser, +}; + +#[cfg(feature = "json")] +use serde::{Deserialize, Serialize}; + +/// SubmissionRecord is used by the sending system to send instructions and information to the +/// receiving system. The sending system uses a submission record to send instructions and +/// information to the receiving system. TempleReady processes submission records to determine +/// which temple the cleared records should be directed to. The submission record is also used for +/// communication between Ancestral File download requests and TempleReady. Each GEDCOM +/// transmission file should have only one submission record. Multiple submissions are handled by +/// creating separate GEDCOM transmission files. See GEDCOM 5.5 spec, page 28. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @SUBMISSION@ SUBN +/// 1 _MYOWNTAG SUBN does not allow NOTE tags :-(( so, here is my not: SUBN seems to be LDS internal data. The sample data I put in here are probably nonsence. +/// 1 SUBM @SUBMITTER@ +/// 1 FAMF NameOfFamilyFile +/// 1 TEMP Abreviated temple code +/// 1 ANCE 1 +/// 1 DESC 1 +/// 1 ORDI yes +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// ``` +#[derive(Debug, Default)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct SubmissionRecord { + pub xref: Option, + pub name_of_family_file: Option, + pub temple_code: Option, + pub submitter_link: Option, + pub generations_of_ancestors: Option, + pub generations_of_descendants: Option, + pub ordinance_process_flag: Option, + pub automated_record_id: Option, + pub note: Option, + pub change_date: Option, + pub custom_data: Vec, +} + +impl SubmissionRecord { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> SubmissionRecord { + let mut subn = SubmissionRecord { + xref, + name_of_family_file: None, + submitter_link: None, + generations_of_ancestors: None, + generations_of_descendants: None, + ordinance_process_flag: None, + automated_record_id: None, + temple_code: None, + note: None, + change_date: None, + custom_data: Vec::new(), + }; + subn.parse(tokenizer, level); + subn + } + + pub fn add_custom_data(&mut self, data: UserDefinedData) { + self.custom_data.push(data) + } +} + +impl Parser for SubmissionRecord { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + tokenizer.next_token(); + + loop { + if let Token::Level(cur_level) = &tokenizer.current_token { + if cur_level <= &level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "ANCE" => self.generations_of_ancestors = Some(tokenizer.take_line_value()), + "DATE" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "DESC" => self.generations_of_descendants = Some(tokenizer.take_line_value()), + "FAMF" => self.name_of_family_file = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "ORDI" => self.ordinance_process_flag = Some(tokenizer.take_line_value()), + "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()), + "SUBM" => self.submitter_link = Some(tokenizer.take_line_value()), + "TEMP" => self.temple_code = Some(tokenizer.take_line_value()), + _ => panic!( + "{}, Unhandled SubmissionRecord tag: {}", + tokenizer.debug(), + tag + ), + }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + } + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{}, Unhandled SubmissionRecord: {:?}", + tokenizer.debug(), + &tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 4c09c72..15755fd 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -1,14 +1,12 @@ use crate::{ Parser, tokenizer::{Token, Tokenizer}, - types::{Address, ChangeDate, UserDefinedData, MultimediaLink, Note}, + types::{Address, ChangeDate, UserDefinedData, MultimediaLink, Note, Xref}, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -type Xref = String; - /// The submitter record identifies an individual or organization that contributed information /// contained in the GEDCOM transmission. All records in the transmission are assumed to be /// submitted by the SUBMITTER referenced in the HEADer, unless a SUBMitter reference inside a From 88bebe213344e5684fd02ed4e6fb7c515799d5fb Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sun, 27 Nov 2022 22:11:18 -0600 Subject: [PATCH 42/55] Modify event, individual, family, and source --- src/types/event.rs | 135 ++++++++++++++++++++++++++++++++-------- src/types/family.rs | 10 +-- src/types/individual.rs | 76 +++++++++------------- src/types/mod.rs | 2 +- src/types/source.rs | 8 +-- 5 files changed, 148 insertions(+), 83 deletions(-) diff --git a/src/types/event.rs b/src/types/event.rs index 51dde24..c076bcd 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -11,50 +11,113 @@ use std::{fmt, string::ToString}; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, PartialEq)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub enum EventType { +pub enum Event { Adoption, + AdultChristening, + Baptism, + BarMitzvah, + BasMitzvah, Birth, + Blessing, Burial, - Death, + Census, Christening, - Marriage, + Confirmation, Cremation, + Death, + Emigration, + Event, + FirstCommunion, + Graduation, + Immigration, + Marriage, + Naturalization, + Ordination, + Probate, + Probjate, Residence, - SourceData(String), - + Retired, + Will, // "Other" is used to construct an event without requiring an explicit event type Other, + SourceData(String), } -impl ToString for EventType { +impl ToString for Event { fn to_string(&self) -> String { format!("{:?}", self) } } -/// Event fact +/// EventDetail is a thing that happens on a specific date. Use the date form 'BET date AND date' +/// to indicate that an event took place at some time between two dates. Resist the temptation to +/// use a 'FROM date TO date' form in an event structure. If the subject of your recording occurred +/// over a period of time, then it is probably not an event, but rather an attribute or fact. The +/// EVEN tag in this structure is for recording general events that are not specified in the +/// specification. The event indicated by this general EVEN tag is defined by the value of the +/// subordinate TYPE tag (event_type). +/// +/// # A Minimal Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @PERSON1@ INDI +/// 1 CENS\n\ +/// 2 DATE 31 DEC 1997\n\ +/// 2 PLAC The place\n\ +/// 2 SOUR @SOURCE1@\n\ +/// 3 PAGE 42\n\ +/// 3 DATA\n\ +/// 4 DATE 31 DEC 1900\n\ +/// 4 TEXT a sample text\n\ +/// 5 CONT Sample text continued here. The word TE\n\ +/// 5 CONC ST should not be broken!\n\ +/// 3 QUAY 3\n\ +/// 3 NOTE A note\n\ +/// 4 CONT Note continued here. The word TE\n\ +/// 4 CONC ST should not be broken!\n\ +/// 2 NOTE CENSUS event note (the event of the periodic count of the population for a designated locality, such as a national or state Census)\n\ +/// 3 CONT Note continued here. The word TE\n\ +/// 3 CONC ST should not be broken!\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let event = data.individuals[0].events[0].event.to_string(); +/// assert_eq!(event, "Census"); +/// ``` #[derive(Clone)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct Event { - pub event: EventType, +pub struct EventDetail { + pub event: Event, pub value: Option, pub date: Option, pub place: Option, pub note: Option, pub child_to_family_link: Option, + /// event_type handles the TYPE tag, a descriptive word or phrase used to further classify the + /// parent event or attribute tag. This should be used whenever either of the generic EVEN or + /// FACT tags are used. T. See GEDCOM 5.5 spec, page 35 and 49. + pub event_type: Option, pub citations: Vec, } -impl Event { +impl EventDetail { #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> Event { - let mut event = Event { + pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> EventDetail { + let mut event = EventDetail { event: Self::from_tag(tag), value: None, date: None, place: None, note: None, child_to_family_link: None, + event_type: None, citations: Vec::new(), }; event.parse(tokenizer, level); @@ -63,20 +126,37 @@ impl Event { /** converts an event to be of type `SourceData` with `value` as the data */ pub fn with_source_data(&mut self, value: String) { - self.event = EventType::SourceData(value); + self.event = Event::SourceData(value); } - pub fn from_tag(tag: &str) -> EventType { + pub fn from_tag(tag: &str) -> Event { match tag { - "ADOP" => EventType::Adoption, - "BIRT" => EventType::Birth, - "BURI" => EventType::Burial, - "CHR" => EventType::Christening, - "DEAT" => EventType::Death, - "MARR" => EventType::Marriage, - "CREM" => EventType::Cremation, - "RESI" => EventType::Residence, - "OTHER" => EventType::Other, + "ADOP" => Event::Adoption, + "BAPM" => Event::Baptism, + "BARM" => Event::BarMitzvah, + "BASM" => Event::BasMitzvah, + "BIRT" => Event::Birth, + "BLES" => Event::Blessing, + "BURI" => Event::Burial, + "CENS" => Event::Census, + "CHR" => Event::Christening, + "CHRA" => Event::AdultChristening, + "CONF" => Event::Confirmation, + "CREM" => Event::Cremation, + "DEAT" => Event::Death, + "EMIG" => Event::Emigration, + "EVEN" => Event::Event, + "FCOM" => Event::FirstCommunion, + "GRAD" => Event::Graduation, + "IMMI" => Event::Immigration, + "MARR" => Event::Marriage, + "NATU" => Event::Naturalization, + "ORDN" => Event::Ordination, + "PROB" => Event::Probate, + "RESI" => Event::Residence, + "RETI" => Event::Retired, + "WILL" => Event::Will, + "OTHER" => Event::Other, _ => panic!("Unrecognized EventType tag: {}", tag), } } @@ -91,7 +171,7 @@ impl Event { } } -impl std::fmt::Debug for Event { +impl std::fmt::Debug for EventDetail { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let event_type = format!("{:?} Event", &self.event); let mut debug = f.debug_struct(&event_type); @@ -105,8 +185,8 @@ impl std::fmt::Debug for Event { /// Trait given to structs representing entities that have events. pub trait HasEvents { - fn add_event(&mut self, event: Event) -> (); - fn events(&self) -> Vec; + fn add_event(&mut self, event: EventDetail) -> (); + fn events(&self) -> Vec; fn dates(&self) -> Vec { let mut dates: Vec = Vec::new(); for event in self.events() { @@ -127,7 +207,7 @@ pub trait HasEvents { } } -impl Parser for Event { +impl Parser for EventDetail { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); @@ -157,6 +237,7 @@ impl Parser for Event { Some(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())) } "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "TYPE" => self.event_type = Some(tokenizer.take_line_value()), _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), }, Token::Level(_) => tokenizer.next_token(), diff --git a/src/types/family.rs b/src/types/family.rs index 9ab517e..1921950 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,7 +1,7 @@ use crate::{ Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, Event}, + types::{event::HasEvents, EventDetail}, }; #[cfg(feature = "json")] @@ -21,7 +21,7 @@ pub struct Family { pub individual2: Option, // mapped from WIFE pub children: Vec, pub num_children: Option, - events: Vec, + events: Vec, } impl Family { @@ -69,7 +69,7 @@ impl Parser for Family { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "MARR" => self.add_event(Event::new(tokenizer, level + 1, "MARR")), + "MARR" => self.add_event(EventDetail::new(tokenizer, level + 1, "MARR")), "HUSB" => self.set_individual1(tokenizer.take_line_value()), "WIFE" => self.set_individual2(tokenizer.take_line_value()), "CHIL" => self.add_child(tokenizer.take_line_value()), @@ -83,7 +83,7 @@ impl Parser for Family { } impl HasEvents for Family { - fn add_event(&mut self, event: Event) -> () { + fn add_event(&mut self, event: EventDetail) -> () { let event_type = &event.event; for e in &self.events { if &e.event == event_type { @@ -92,7 +92,7 @@ impl HasEvents for Family { } self.events.push(event); } - fn events(&self) -> Vec { + fn events(&self) -> Vec { self.events.clone() } } diff --git a/src/types/individual.rs b/src/types/individual.rs index 4b58fc9..0b4a033 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,7 +1,7 @@ use crate::{ tokenizer::{Token, Tokenizer}, types::{ - event::HasEvents, Event, MultimediaRecord, Note, SourceCitation, UserDefinedData, Xref, + event::HasEvents, EventDetail, MultimediaRecord, Note, SourceCitation, UserDefinedData, Xref, }, Parser, }; @@ -20,7 +20,7 @@ pub struct Individual { pub custom_data: Vec, pub source: Vec, pub multimedia: Vec, - pub events: Vec, + pub events: Vec, pub last_updated: Option, } @@ -69,10 +69,10 @@ impl Individual { } impl HasEvents for Individual { - fn add_event(&mut self, event: Event) -> () { + fn add_event(&mut self, event: EventDetail) -> () { self.events.push(event); } - fn events(&self) -> Vec { + fn events(&self) -> Vec { self.events.clone() } } @@ -93,7 +93,7 @@ impl Parser for Individual { | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" | "MARR" => { let tag_clone = tag.clone(); - self.add_event(Event::new(tokenizer, level + 1, tag_clone.as_str())); + self.add_event(EventDetail::new(tokenizer, level + 1, tag_clone.as_str())); } "FAMC" | "FAMS" => { let tag_clone = tag.clone(); @@ -140,17 +140,13 @@ pub enum GenderType { Unknown, } -impl GenderType { - pub fn get_str(&self) -> &str { - match self { - GenderType::Male => "M", - GenderType::Female => "F", - GenderType::Nonbinary => "X", - GenderType::Unknown => "U", - } +impl ToString for GenderType { + fn to_string(&self) -> String { + format!("{:?}", self) } } + /// Gender (tag: SEX); This can describe an individual’s reproductive or sexual anatomy at birth. /// Related concepts of gender identity or sexual preference are not currently given their own tag. /// Cultural or personal gender preference may be indicated using the FACT tag. See @@ -178,7 +174,7 @@ impl GenderType { /// let data = doc.parse_document(); /// /// let sex = data.individuals[0].sex.as_ref().unwrap(); -/// assert_eq!(sex.value.get_str(), "M"); +/// assert_eq!(sex.value.to_string(), "Male"); /// assert_eq!(sex.fact.as_ref().unwrap(), "A fact about an individual's gender"); /// assert_eq!(sex.sources[0].xref, "@CITATION1@"); /// assert_eq!(sex.sources[0].page.as_ref().unwrap(), "Page: 132"); @@ -273,15 +269,13 @@ pub enum FamilyLinkType { Child, } -impl FamilyLinkType { - pub fn get_str(&self) -> &str { - match self { - FamilyLinkType::Child => "FAMC", - FamilyLinkType::Spouse => "FAMS", - } +impl ToString for FamilyLinkType { + fn to_string(&self) -> String { + format!("{:?}", self) } } + /// Pedigree is a code used to indicate the child to family relationship for pedigree navigation /// purposes. See GEDCOM 5.5 spec, page 57. #[derive(Clone, Debug)] @@ -297,17 +291,13 @@ pub enum Pedigree { Sealing, } -impl Pedigree { - pub fn get_str(&self) -> &str { - match self { - Pedigree::Birth => "birth", - Pedigree::Foster => "foster", - Pedigree::Adopted => "adopted", - Pedigree::Sealing => "sealing", - } +impl ToString for Pedigree { + fn to_string(&self) -> String { + format!("{:?}", self) } } + /// ChildLinkStatus is a A status code that allows passing on the users opinion of the status of a /// child to family link. See GEDCOM 5.5 spec, page 44. #[derive(Clone, Debug)] @@ -324,16 +314,13 @@ pub enum ChildLinkStatus { Proven, } -impl ChildLinkStatus { - pub fn get_str(&self) -> &str { - match self { - ChildLinkStatus::Proven => "proven", - ChildLinkStatus::Disproven => "disproven", - ChildLinkStatus::Challenged => "challenged", - } +impl ToString for ChildLinkStatus { + fn to_string(&self) -> String { + format!("{:?}", self) } } + /// AdoptedByWhichParent is a code which shows which parent in the associated family record adopted /// this person. See GEDCOM 5.5 spec, page 42. #[derive(Clone, Debug)] @@ -347,16 +334,13 @@ pub enum AdoptedByWhichParent { Both, } -impl AdoptedByWhichParent { - pub fn get_str(&self) -> &str { - match self { - AdoptedByWhichParent::Wife => "WIFE", - AdoptedByWhichParent::Husband => "HUSB", - AdoptedByWhichParent::Both => "BOTH", - } +impl ToString for AdoptedByWhichParent { + fn to_string(&self) -> String { + format!("{:?}", self) } } + /// FamilyLink indicates the normal lineage links through the use of pointers from the individual /// to a family through either the FAMC tag or the FAMS tag. The FAMC tag provides a pointer to a /// family where this person is a child. The FAMS tag provides a pointer to a family where this @@ -386,10 +370,10 @@ impl AdoptedByWhichParent { /// /// let famc = data.individuals[0].events[0].child_to_family_link.as_ref().unwrap(); /// assert_eq!(famc.xref, "@ADOPTIVE_PARENTS@"); -/// assert_eq!(famc.family_link_type.get_str(), "FAMC"); -/// assert_eq!(famc.pedigree_linkage_type.as_ref().unwrap().get_str(), "adopted"); -/// assert_eq!(famc.child_linkage_status.as_ref().unwrap().get_str(), "proven"); -/// assert_eq!(famc.adopted_by.as_ref().unwrap().get_str(), "BOTH"); +/// assert_eq!(famc.family_link_type.to_string(), "Child"); +/// assert_eq!(famc.pedigree_linkage_type.as_ref().unwrap().to_string(), "Adopted"); +/// assert_eq!(famc.child_linkage_status.as_ref().unwrap().to_string(), "Proven"); +/// assert_eq!(famc.adopted_by.as_ref().unwrap().to_string(), "Both"); /// ``` #[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] diff --git a/src/types/mod.rs b/src/types/mod.rs index 39f12cd..32368e0 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; pub mod event; -pub use event::{Event, EventType}; +pub use event::{EventDetail, Event}; pub mod date; pub use date::{ChangeDate, Date}; diff --git a/src/types/source.rs b/src/types/source.rs index e190aee..38b6173 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,7 +1,7 @@ use crate::{ Parser, tokenizer::{Token, Tokenizer}, - types::{Date, Event, Note, RepoCitation, UserDefinedData, Xref}, + types::{Date, EventDetail, Note, RepoCitation, UserDefinedData, Xref}, }; #[cfg(feature = "json")] @@ -56,7 +56,7 @@ impl Parser for Source { "DATA" => tokenizer.next_token(), "EVEN" => { let events_recorded = tokenizer.take_line_value(); - let mut event = Event::new(tokenizer, level + 2, "OTHER"); + let mut event = EventDetail::new(tokenizer, level + 2, "OTHER"); event.with_source_data(events_recorded); self.data.add_event(event); } @@ -77,12 +77,12 @@ impl Parser for Source { #[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct SourceData { - events: Vec, + events: Vec, pub agency: Option, } impl SourceData { - pub fn add_event(&mut self, event: Event) { + pub fn add_event(&mut self, event: EventDetail) { self.events.push(event); } } From 3ba2fcd4e08f92c00dc7917fd4fd21ab7b40369b Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 14:48:33 -0600 Subject: [PATCH 43/55] Remove typo in Event --- src/types/event.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/types/event.rs b/src/types/event.rs index c076bcd..627807c 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -34,7 +34,6 @@ pub enum Event { Naturalization, Ordination, Probate, - Probjate, Residence, Retired, Will, From 6b08f25cb3a9aff1176f0c7ebce2c48a08a510b3 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 14:50:40 -0600 Subject: [PATCH 44/55] Generalize documentation re: SubmissionRecord --- src/types/submission.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/types/submission.rs b/src/types/submission.rs index 62bf098..d40b249 100644 --- a/src/types/submission.rs +++ b/src/types/submission.rs @@ -9,11 +9,10 @@ use serde::{Deserialize, Serialize}; /// SubmissionRecord is used by the sending system to send instructions and information to the /// receiving system. The sending system uses a submission record to send instructions and -/// information to the receiving system. TempleReady processes submission records to determine -/// which temple the cleared records should be directed to. The submission record is also used for -/// communication between Ancestral File download requests and TempleReady. Each GEDCOM -/// transmission file should have only one submission record. Multiple submissions are handled by -/// creating separate GEDCOM transmission files. See GEDCOM 5.5 spec, page 28. +/// information to the receiving system. The submission record is also used for communication +/// between Ancestral File download requests and TempleReady. Each GEDCOM transmission file should +/// have only one submission record. Multiple submissions are handled by creating separate GEDCOM +/// transmission files. See GEDCOM 5.5 spec, page 28. /// /// # Example /// From 95402606248f5af8006ff05b1ae5e38acbbb3a4f Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 14:51:45 -0600 Subject: [PATCH 45/55] Modify example in SubmissionRecord docs --- src/types/submission.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/types/submission.rs b/src/types/submission.rs index d40b249..aaf300f 100644 --- a/src/types/submission.rs +++ b/src/types/submission.rs @@ -22,14 +22,14 @@ use serde::{Deserialize, Serialize}; /// 0 HEAD\n\ /// 1 GEDC\n\ /// 2 VERS 5.5\n\ -/// 0 @SUBMISSION@ SUBN -/// 1 _MYOWNTAG SUBN does not allow NOTE tags :-(( so, here is my not: SUBN seems to be LDS internal data. The sample data I put in here are probably nonsence. -/// 1 SUBM @SUBMITTER@ -/// 1 FAMF NameOfFamilyFile -/// 1 TEMP Abreviated temple code -/// 1 ANCE 1 -/// 1 DESC 1 -/// 1 ORDI yes +/// 0 @SUBMISSION@ SUBN\n\ +/// 1 _MYOWNTAG SUBN does not allow NOTE tags :-(( so, here is my not: SUBN seems to be LDS internal data. The sample data I put in here are probably nonsence.\n\ +/// 1 SUBM @SUBMITTER@\n\ +/// 1 FAMF NameOfFamilyFile\n\ +/// 1 TEMP Abreviated temple code\n\ +/// 1 ANCE 1\n\ +/// 1 DESC 1\n\ +/// 1 ORDI yes\n\ /// 0 TRLR"; /// /// let mut doc = GedcomDocument::new(sample.chars()); From db662d025e914d7c352e99e2345acddba4957df9 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 14:52:47 -0600 Subject: [PATCH 46/55] Clarify docs for Event --- src/types/event.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types/event.rs b/src/types/event.rs index 627807c..c916cf5 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -56,7 +56,7 @@ impl ToString for Event { /// specification. The event indicated by this general EVEN tag is defined by the value of the /// subordinate TYPE tag (event_type). /// -/// # A Minimal Example +/// # Example /// /// ```rust /// use gedcom::GedcomDocument; From 551dcaf7d55db13aaebc7e1eba868c1a0f422319 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 15:01:57 -0600 Subject: [PATCH 47/55] Modify README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e77360c..19ece88 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Later specifications, such as [5.5.2](https://jfcardinal.github.io/GEDCOM-5.5.2/ ## Usage -This crate comes in two parts. The first is a binary called `parse_gedcom`, mostly used for my testing & development. It prints the `GedcomData` object and some stats about the GEDCOM file passed into it: +This crate comes in two parts. The first is a binary called `parse_gedcom`, mostly used for testing & development. It prints the `GedcomData` object and some stats about the GEDCOM file passed into it: ```bash parse_gedcom ./tests/fixtures/sample.ged From 842ef4e4707e9b71bfa558c99c9d2cd94a6a4219 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 17:27:45 -0600 Subject: [PATCH 48/55] Add handler for Individual Attributes --- src/types/individual.rs | 214 ++++++++++++++++++++++++++++++++++++++-- src/types/source.rs | 22 +++-- 2 files changed, 220 insertions(+), 16 deletions(-) diff --git a/src/types/individual.rs b/src/types/individual.rs index 0b4a033..37682e5 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,7 +1,8 @@ use crate::{ tokenizer::{Token, Tokenizer}, types::{ - event::HasEvents, EventDetail, MultimediaRecord, Note, SourceCitation, UserDefinedData, Xref, + event::HasEvents, Date, EventDetail, MultimediaRecord, Note, + SourceCitation, UserDefinedData, Xref, }, Parser, }; @@ -18,9 +19,10 @@ pub struct Individual { pub sex: Option, pub families: Vec, pub custom_data: Vec, + pub attributes: Vec, pub source: Vec, - pub multimedia: Vec, pub events: Vec, + pub multimedia: Vec, pub last_updated: Option, } @@ -33,6 +35,7 @@ impl Individual { sex: None, events: Vec::new(), families: Vec::new(), + attributes: Vec::new(), custom_data: Vec::new(), last_updated: None, source: Vec::new(), @@ -66,6 +69,10 @@ impl Individual { pub fn add_multimedia(&mut self, multimedia: MultimediaRecord) { self.multimedia.push(multimedia); } + + pub fn add_attribute(&mut self, attribute: AttributeDetail) { + self.attributes.push(attribute); + } } impl HasEvents for Individual { @@ -86,6 +93,7 @@ impl Parser for Individual { while tokenizer.current_token != Token::Level(level) { match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { + // TODO handle xref "NAME" => self.name = Some(Name::new(tokenizer, level + 1)), "SEX" => self.sex = Some(Gender::new(tokenizer, level + 1)), "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" @@ -95,6 +103,16 @@ impl Parser for Individual { let tag_clone = tag.clone(); self.add_event(EventDetail::new(tokenizer, level + 1, tag_clone.as_str())); } + "CAST" | "DSCR" | "EDUC" | "IDNO" | "NATI" | "NCHI" | "NMR" | "OCCU" + | "PROP" | "RELI" | "SSN" | "TITL" | "FACT" => { + // RESI should be an attribute or an event? + let tag_clone = tag.clone(); + self.add_attribute(AttributeDetail::new( + tokenizer, + level + 1, + tag_clone.as_str(), + )); + } "FAMC" | "FAMS" => { let tag_clone = tag.clone(); self.add_family(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())); @@ -108,7 +126,6 @@ impl Parser for Individual { "SOUR" => { self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); } - // TODO handle xref "OBJE" => { self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)) } @@ -146,7 +163,6 @@ impl ToString for GenderType { } } - /// Gender (tag: SEX); This can describe an individual’s reproductive or sexual anatomy at birth. /// Related concepts of gender identity or sexual preference are not currently given their own tag. /// Cultural or personal gender preference may be indicated using the FACT tag. See @@ -275,7 +291,6 @@ impl ToString for FamilyLinkType { } } - /// Pedigree is a code used to indicate the child to family relationship for pedigree navigation /// purposes. See GEDCOM 5.5 spec, page 57. #[derive(Clone, Debug)] @@ -297,7 +312,6 @@ impl ToString for Pedigree { } } - /// ChildLinkStatus is a A status code that allows passing on the users opinion of the status of a /// child to family link. See GEDCOM 5.5 spec, page 44. #[derive(Clone, Debug)] @@ -320,7 +334,6 @@ impl ToString for ChildLinkStatus { } } - /// AdoptedByWhichParent is a code which shows which parent in the associated family record adopted /// this person. See GEDCOM 5.5 spec, page 42. #[derive(Clone, Debug)] @@ -340,7 +353,6 @@ impl ToString for AdoptedByWhichParent { } } - /// FamilyLink indicates the normal lineage links through the use of pointers from the individual /// to a family through either the FAMC tag or the FAMS tag. The FAMC tag provides a pointer to a /// family where this person is a child. The FAMS tag provides a pointer to a family where this @@ -527,3 +539,189 @@ impl Parser for Name { } } } + +/// IndividualAttribute indicates other attributes or facts are used to describe an individual's +/// actions, physical description, employment, education, places of residence, etc. These are not +/// generally thought of as events. However, they are often described like events because they were +/// observed at a particular time and/or place. See GEDCOM 5.5 spec, page +/// 33. +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub enum IndividualAttribute { + CastName, + PhysicalDescription, + ScholasticAchievement, + NationalIDNumber, + NationalOrTribalOrigin, + CountOfChildren, + CountOfMarriages, + Occupation, + Possessions, + ReligiousAffiliation, + ResidesAt, + SocialSecurityNumber, + NobilityTypeTitle, + Fact, +} + +impl ToString for IndividualAttribute { + fn to_string(&self) -> String { + format!("{:?}", self) + } +} + +/// AttributeDetail indicates other attributes or facts are used to describe an individual's +/// actions, physical description, employment, education, places of residence, etc. GEDCOM 5.x +/// allows them to be recorded in the same way as events. The attribute definition allows a value +/// on the same line as the attribute tag. In addition, it allows a subordinate date period, place +/// and/or address, etc. to be transmitted, just as the events are. Previous versions, which +/// handled just a tag and value, can be read as usual by handling the subordinate attribute detail +/// as an exception. . See GEDCOM 5.5 spec, page 69. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 DSCR Physical description\n\ +/// 2 DATE 31 DEC 1997\n\ +/// 2 PLAC The place\n\ +/// 2 SOUR @SOURCE1@\n\ +/// 3 PAGE 42\n\ +/// 3 DATA\n\ +/// 4 DATE 31 DEC 1900\n\ +/// 4 TEXT a sample text\n\ +/// 5 CONT Sample text continued here. The word TE\n\ +/// 5 CONC ST should not be broken!\n\ +/// 3 QUAY 3\n\ +/// 3 NOTE A note\n\ +/// 4 CONT Note continued here. The word TE\n\ +/// 4 CONC ST should not be broken!\n\ +/// 2 NOTE PHY_DESCRIPTION event note (the physical characteristics of a person, place, or thing)\n\ +/// 3 CONT Note continued here. The word TE\n\ +/// 3 CONC ST should not be broken!\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// assert_eq!(data.individuals.len(), 1); +/// +/// let attr = &data.individuals[0].attributes[0]; +/// assert_eq!(attr.attribute.to_string(), "PhysicalDescription"); +/// assert_eq!(attr.value.as_ref().unwrap(), "Physical description"); +/// assert_eq!(attr.date.as_ref().unwrap().value.as_ref().unwrap(), "31 DEC 1997"); +/// assert_eq!(attr.place.as_ref().unwrap(), "The place"); +/// +/// let a_sour = &data.individuals[0].attributes[0].sources[0]; +/// assert_eq!(a_sour.page.as_ref().unwrap(), "42"); +/// assert_eq!(a_sour.data.as_ref().unwrap().date.as_ref().unwrap().value.as_ref().unwrap(), "31 DEC 1900"); +/// assert_eq!(a_sour.data.as_ref().unwrap().text.as_ref().unwrap().value.as_ref().unwrap(), "a sample text\nSample text continued here. The word TEST should not be broken!"); +/// assert_eq!(a_sour.certainty_assessment.as_ref().unwrap().to_string(), "Direct"); +/// assert_eq!(a_sour.note.as_ref().unwrap().value.as_ref().unwrap(), "A note\nNote continued here. The word TEST should not be broken!"); +/// ``` +#[derive(Clone, Debug)] +#[cfg_attr(feature = "json", derive(Serialize, Deserialize))] +pub struct AttributeDetail { + pub attribute: IndividualAttribute, + pub value: Option, + pub place: Option, + pub date: Option, + pub sources: Vec, + pub note: Option, + /// attribute_type handles the TYPE tag, a descriptive word or phrase used to further classify the + /// parent event or attribute tag. This should be used to define what kind of identification + /// number or fact classification is being defined. + pub attribute_type: Option, +} + +impl AttributeDetail { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> AttributeDetail { + let mut attribute = AttributeDetail { + attribute: Self::from_tag(tag), + place: None, + value: None, + date: None, + sources: Vec::new(), + note: None, + attribute_type: None, + }; + attribute.parse(tokenizer, level); + attribute + } + + pub fn from_tag(tag: &str) -> IndividualAttribute { + match tag { + "CAST" => IndividualAttribute::CastName, + "DSCR" => IndividualAttribute::PhysicalDescription, + "EDUC" => IndividualAttribute::ScholasticAchievement, + "IDNO" => IndividualAttribute::NationalIDNumber, + "NATI" => IndividualAttribute::NationalOrTribalOrigin, + "NCHI" => IndividualAttribute::CountOfChildren, + "NMR" => IndividualAttribute::CountOfMarriages, + "OCCU" => IndividualAttribute::Occupation, + "PROP" => IndividualAttribute::Possessions, + "RELI" => IndividualAttribute::ReligiousAffiliation, + "RESI" => IndividualAttribute::ResidesAt, + "SSN" => IndividualAttribute::SocialSecurityNumber, + "TITL" => IndividualAttribute::NobilityTypeTitle, + "FACT" => IndividualAttribute::Fact, + _ => panic!("Unrecognized IndividualAttribute tag: {}", tag), + } + } + + pub fn add_source_citation(&mut self, sour: SourceCitation) { + self.sources.push(sour); + } +} + +impl Parser for AttributeDetail { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + tokenizer.next_token(); + + let mut value = String::new(); + + if let Token::LineValue(val) = &tokenizer.current_token { + value.push_str(&val); + tokenizer.next_token(); + } + + loop { + if let Token::Level(cur_level) = &tokenizer.current_token { + if cur_level <= &level { + break; + } + } + // tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), + "PLAC" => self.place = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "TYPE" => self.attribute_type = Some(tokenizer.take_continued_text(level + 1)), + _ => panic!( + "{}, Unhandled AttributeDetail tag: {}", + tokenizer.debug(), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{}, Unhandled AttributeDetail token: {:?}", + tokenizer.debug(), + tokenizer.current_token + ), + } + } + + if &value != "" { + self.value = Some(value); + } + } +} diff --git a/src/types/source.rs b/src/types/source.rs index 38b6173..d061036 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,7 +1,7 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, types::{Date, EventDetail, Note, RepoCitation, UserDefinedData, Xref}, + Parser, }; #[cfg(feature = "json")] @@ -400,13 +400,19 @@ impl CertaintyAssessment { } pub fn get_int(&self) -> Option { - match &self { - CertaintyAssessment::Unreliable => Some(0), - CertaintyAssessment::Questionable => Some(1), - CertaintyAssessment::Secondary => Some(2), - CertaintyAssessment::Direct => Some(3), - CertaintyAssessment::None => None, - } + match &self { + CertaintyAssessment::Unreliable => Some(0), + CertaintyAssessment::Questionable => Some(1), + CertaintyAssessment::Secondary => Some(2), + CertaintyAssessment::Direct => Some(3), + CertaintyAssessment::None => None, + } + } +} + +impl ToString for CertaintyAssessment { + fn to_string(&self) -> String { + format!("{:?}", self) } } From 98ceb1856160680b917048fa3bfd15095d2485a5 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 28 Nov 2022 17:37:24 -0600 Subject: [PATCH 49/55] Handle NOTE, CHAN for Individual; NOTE for FamilyLink --- src/types/individual.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/types/individual.rs b/src/types/individual.rs index 37682e5..8b4789a 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,8 +1,8 @@ use crate::{ tokenizer::{Token, Tokenizer}, types::{ - event::HasEvents, Date, EventDetail, MultimediaRecord, Note, - SourceCitation, UserDefinedData, Xref, + event::HasEvents, ChangeDate, Date, EventDetail, MultimediaRecord, Note, SourceCitation, + UserDefinedData, Xref, }, Parser, }; @@ -24,6 +24,8 @@ pub struct Individual { pub events: Vec, pub multimedia: Vec, pub last_updated: Option, + pub note: Option, + pub change_date: Option, } impl Individual { @@ -40,6 +42,8 @@ impl Individual { last_updated: None, source: Vec::new(), multimedia: Vec::new(), + change_date: None, + note: None, }; indi.parse(tokenizer, level); indi @@ -117,18 +121,14 @@ impl Parser for Individual { let tag_clone = tag.clone(); self.add_family(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())); } - "CHAN" => { - // assuming it always only has a single DATE subtag - tokenizer.next_token(); // level - tokenizer.next_token(); // DATE tag - self.last_updated = Some(tokenizer.take_line_value()); - } + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), "SOUR" => { self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); } "OBJE" => { self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)) } + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Individual Tag: {}", tokenizer.debug(), tag), }, Token::CustomTag(tag) => { @@ -466,6 +466,7 @@ impl Parser for FamilyLink { Token::Tag(tag) => match tag.as_str() { "PEDI" => self.set_pedigree(tokenizer.take_line_value().as_str()), "STAT" => self.set_child_linkage_status(&tokenizer.take_line_value().as_str()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), "ADOP" => { self.set_adopted_by_which_parent(&tokenizer.take_line_value().as_str()) } From c8146f0506b3fc11acecd03e42f7e3615d75faa0 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Wed, 30 Nov 2022 00:37:57 -0600 Subject: [PATCH 50/55] Modify Family & Source datasets to handle more tags --- src/types/date.rs | 2 +- src/types/event.rs | 146 ++++++++++++++++++++++++++++++++++++++-- src/types/family.rs | 66 ++++++++++++++++-- src/types/individual.rs | 2 +- src/types/multimedia.rs | 7 +- src/types/source.rs | 55 ++++++++++++++- 6 files changed, 259 insertions(+), 19 deletions(-) diff --git a/src/types/date.rs b/src/types/date.rs index a7047dd..d90f004 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -128,7 +128,7 @@ impl Parser for Date { /// let chan_note = chan.note.as_ref().unwrap(); /// assert_eq!(chan_note.value.as_ref().unwrap(), "A note"); /// ``` -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct ChangeDate { pub value: Option, diff --git a/src/types/event.rs b/src/types/event.rs index c916cf5..10a37a4 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -14,6 +14,7 @@ use std::{fmt, string::ToString}; pub enum Event { Adoption, AdultChristening, + Annulment, Baptism, BarMitzvah, BasMitzvah, @@ -25,12 +26,19 @@ pub enum Event { Confirmation, Cremation, Death, + Divorce, + DivorceFiled, Emigration, + Engagement, Event, FirstCommunion, Graduation, Immigration, Marriage, + MarriageBann, + MarriageContract, + MarriageLicense, + MarriageSettlement, Naturalization, Ordination, Probate, @@ -54,7 +62,7 @@ impl ToString for Event { /// over a period of time, then it is probably not an event, but rather an attribute or fact. The /// EVEN tag in this structure is for recording general events that are not specified in the /// specification. The event indicated by this general EVEN tag is defined by the value of the -/// subordinate TYPE tag (event_type). +/// subordinate TYPE tag (event_type). /// /// # Example /// @@ -98,7 +106,8 @@ pub struct EventDetail { pub date: Option, pub place: Option, pub note: Option, - pub child_to_family_link: Option, + pub family_link: Option, + pub family_event_details: Vec, /// event_type handles the TYPE tag, a descriptive word or phrase used to further classify the /// parent event or attribute tag. This should be used whenever either of the generic EVEN or /// FACT tags are used. T. See GEDCOM 5.5 spec, page 35 and 49. @@ -115,7 +124,8 @@ impl EventDetail { date: None, place: None, note: None, - child_to_family_link: None, + family_link: None, + family_event_details: Vec::new(), event_type: None, citations: Vec::new(), }; @@ -131,6 +141,7 @@ impl EventDetail { pub fn from_tag(tag: &str) -> Event { match tag { "ADOP" => Event::Adoption, + "ANUL" => Event::Annulment, "BAPM" => Event::Baptism, "BARM" => Event::BarMitzvah, "BASM" => Event::BasMitzvah, @@ -143,19 +154,26 @@ impl EventDetail { "CONF" => Event::Confirmation, "CREM" => Event::Cremation, "DEAT" => Event::Death, + "DIV" => Event::Divorce, + "DIVF" => Event::DivorceFiled, "EMIG" => Event::Emigration, + "ENGA" => Event::Engagement, "EVEN" => Event::Event, "FCOM" => Event::FirstCommunion, "GRAD" => Event::Graduation, "IMMI" => Event::Immigration, + "MARB" => Event::MarriageBann, + "MARC" => Event::MarriageContract, + "MARL" => Event::MarriageLicense, "MARR" => Event::Marriage, + "MARS" => Event::MarriageSettlement, "NATU" => Event::Naturalization, "ORDN" => Event::Ordination, + "OTHER" => Event::Other, "PROB" => Event::Probate, "RESI" => Event::Residence, "RETI" => Event::Retired, "WILL" => Event::Will, - "OTHER" => Event::Other, _ => panic!("Unrecognized EventType tag: {}", tag), } } @@ -164,6 +182,10 @@ impl EventDetail { self.citations.push(citation) } + pub fn add_family_event_detail(&mut self, detail: FamilyEventDetail) { + self.family_event_details.push(detail); + } + #[must_use] pub fn get_citations(&self) -> Vec { self.citations.clone() @@ -232,9 +254,13 @@ impl Parser for EventDetail { "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), "FAMC" => { let tag_clone = tag.clone(); - self.child_to_family_link = + self.family_link = Some(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())) } + "HUSB" | "WIFE" => { + let tag_clone = tag.clone(); + self.add_family_event_detail(FamilyEventDetail::new(tokenizer, level + 1, tag_clone.as_str())); + } "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), "TYPE" => self.event_type = Some(tokenizer.take_line_value()), _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), @@ -249,3 +275,113 @@ impl Parser for EventDetail { } } } + +/// Spouse in a family that experiences an event. +#[derive(Clone, Debug)] +pub enum Spouse { + Spouse1, + Spouse2, +} + +impl ToString for Spouse { + fn to_string(&self) -> String { + format!("{:?}", self) + } +} + +/// FamilyEventDetail defines an additional dataset found in certain events. +/// +/// # Example +/// +/// ```rust +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @FAMILY1@ FAM +/// 1 ANUL +/// 2 DATE 31 DEC 1997 +/// 2 PLAC The place +/// 2 SOUR @SOURCE1@ +/// 3 PAGE 42 +/// 3 DATA +/// 4 DATE 31 DEC 1900 +/// 4 TEXT a sample text +/// 5 CONT Sample text continued here. The word TE +/// 5 CONC ST should not be broken! +/// 3 QUAY 3 +/// 3 NOTE A note +/// 4 CONT Note continued here. The word TE +/// 4 CONC ST should not be broken! +/// 2 NOTE ANNULMENT event note (declaring a marriage void from the beginning (never existed)) +/// 3 CONT Note continued here. The word TE +/// 3 CONC ST should not be broken! +/// 2 HUSB +/// 3 AGE 42y +/// 2 WIFE +/// 3 AGE 42y 6m +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let anul = &data.families[0].events; +/// assert_eq!(anul.len(), 1); +/// +/// ``` +#[derive(Clone)] +pub struct FamilyEventDetail { + pub member: Spouse, + pub age: Option, +} + +impl FamilyEventDetail { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> FamilyEventDetail { + let mut fe = FamilyEventDetail { + member: Self::from_tag(tag), + age: None, + }; + fe.parse(tokenizer, level); + fe + } + + pub fn from_tag(tag: &str) -> Spouse { + match tag { + "HUSB" => Spouse::Spouse1, + "WIFE" => Spouse::Spouse2, + _ => panic!("{:?}, Unrecognized FamilyEventMember", tag), + } + } +} + +impl Parser for FamilyEventDetail { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + tokenizer.next_token(); + loop { + if let Token::Level(cur_level) = tokenizer.current_token { + if cur_level <= level { + break; + } + } + tokenizer.next_token(); + match &tokenizer.current_token { + Token::Tag(tag) => match tag.as_str() { + "AGE" => self.age = Some(tokenizer.take_line_value()), + _ => panic!( + "{}, Unrecognized FamilyEventDetail tag: {}", + tokenizer.debug(), + tag + ), + }, + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{} Unrecognized FamilyEventDetail: {:?}", + tokenizer.debug(), + tokenizer.current_token + ), + } + } + } +} diff --git a/src/types/family.rs b/src/types/family.rs index 1921950..81f6400 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,14 +1,15 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, - types::{event::HasEvents, EventDetail}, + types::{ + event::HasEvents, ChangeDate, EventDetail, MultimediaRecord, Note, SourceCitation, + UserDefinedData, Xref, + }, + Parser, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -type Xref = String; - /// Family fact, representing a relationship between `Individual`s /// /// This data representation understands that HUSB & WIFE are just poorly-named @@ -19,9 +20,15 @@ pub struct Family { pub xref: Option, pub individual1: Option, // mapped from HUSB pub individual2: Option, // mapped from WIFE + pub family_event: Vec, pub children: Vec, - pub num_children: Option, - events: Vec, + pub num_children: Option, + pub change_date: Option, + pub events: Vec, + pub sources: Vec, + pub multimedia: Vec, + pub custom_data: Vec, + pub notes: Vec, } impl Family { @@ -31,6 +38,10 @@ impl Family { fam.xref = xref; fam.children = Vec::new(); fam.events = Vec::new(); + fam.sources = Vec::new(); + fam.multimedia = Vec::new(); + fam.notes = Vec::new(); + fam.custom_data = Vec::new(); fam.parse(tokenizer, level); fam } @@ -52,6 +63,26 @@ impl Family { pub fn add_child(&mut self, xref: Xref) { self.children.push(xref); } + + pub fn add_event(&mut self, family_event: EventDetail) { + self.events.push(family_event); + } + + pub fn add_source(&mut self, sour: SourceCitation) { + self.sources.push(sour); + } + + pub fn add_multimedia(&mut self, media: MultimediaRecord) { + self.multimedia.push(media); + } + + pub fn add_note(&mut self, note: Note) { + self.notes.push(note); + } + + pub fn add_custom_data(&mut self, custom: UserDefinedData) { + self.custom_data.push(custom); + } } impl Parser for Family { @@ -67,14 +98,35 @@ impl Parser for Family { } } + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { - "MARR" => self.add_event(EventDetail::new(tokenizer, level + 1, "MARR")), + "MARR" | "ANUL" | "CENS" | "DIV" | "DIVF" | "ENGA" | "MARB" | "MARC" + | "MARL" | "MARS" | "RESI" | "EVEN" => { + let tag_clone = tag.clone(); + self.add_event(EventDetail::new(tokenizer, level + 1, tag_clone.as_str())); + } "HUSB" => self.set_individual1(tokenizer.take_line_value()), "WIFE" => self.set_individual2(tokenizer.take_line_value()), "CHIL" => self.add_child(tokenizer.take_line_value()), + "NCHI" => self.num_children = Some(tokenizer.take_line_value()), + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "SOUR" => self.add_source(SourceCitation::new(tokenizer, level + 1)), + "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), + "OBJE" => { + self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)) + } _ => panic!("{} Unhandled Family Tag: {}", tokenizer.debug(), tag), }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + } Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Family Token: {:?}", tokenizer.current_token), } diff --git a/src/types/individual.rs b/src/types/individual.rs index 8b4789a..5b7d7c0 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -380,7 +380,7 @@ impl ToString for AdoptedByWhichParent { /// let mut doc = GedcomDocument::new(sample.chars()); /// let data = doc.parse_document(); /// -/// let famc = data.individuals[0].events[0].child_to_family_link.as_ref().unwrap(); +/// let famc = data.individuals[0].events[0].family_link.as_ref().unwrap(); /// assert_eq!(famc.xref, "@ADOPTIVE_PARENTS@"); /// assert_eq!(famc.family_link_type.to_string(), "Child"); /// assert_eq!(famc.pedigree_linkage_type.as_ref().unwrap().to_string(), "Adopted"); diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index 51a9ccd..12666a4 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -44,7 +44,7 @@ use crate::{ /// let rin = obje.automated_record_id.as_ref().unwrap(); /// assert_eq!(rin, "Automated Id"); /// ``` -#[derive(Debug)] +#[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaRecord { /// Optional reference to link to this submitter @@ -240,7 +240,7 @@ impl Parser for MultimediaLink { /// assert_eq!(form.value.as_ref().unwrap(), "bmp"); /// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); /// ``` -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaFileRefn { pub value: Option, @@ -318,7 +318,7 @@ impl Parser for MultimediaFileRefn { /// assert_eq!(form.value.as_ref().unwrap(), "bmp"); /// assert_eq!(form.source_media_type.as_ref().unwrap(), "photo"); /// ``` -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaFormat { pub value: Option, @@ -395,6 +395,7 @@ impl Parser for MultimediaFormat { /// "User Reference Type" /// ); /// ``` +#[derive(Clone)] pub struct UserReferenceNumber { /// line value pub value: Option, diff --git a/src/types/source.rs b/src/types/source.rs index d061036..8c9cdb7 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,6 +1,8 @@ use crate::{ tokenizer::{Token, Tokenizer}, - types::{Date, EventDetail, Note, RepoCitation, UserDefinedData, Xref}, + types::{ + ChangeDate, Date, EventDetail, MultimediaRecord, Note, RepoCitation, UserDefinedData, Xref, + }, Parser, }; @@ -15,7 +17,14 @@ pub struct Source { pub data: SourceData, pub abbreviation: Option, pub title: Option, - repo_citations: Vec, + pub author: Option, + pub publication_facts: Option, + pub citation_from_source: Option, + pub date: Option>, + pub multimedia: Vec, + pub notes: Vec, + pub repo_citations: Vec, + pub custom_data: Vec, } impl Source { @@ -29,12 +38,31 @@ impl Source { }, abbreviation: None, title: None, + date: None, + author: None, + publication_facts: None, + citation_from_source: None, + multimedia: Vec::new(), + notes: Vec::new(), repo_citations: Vec::new(), + custom_data: Vec::new(), }; sour.parse(tokenizer, level); sour } + pub fn add_custom_data(&mut self, data: UserDefinedData) { + self.custom_data.push(data); + } + + pub fn add_multimedia(&mut self, media: MultimediaRecord) { + self.multimedia.push(media); + } + + pub fn add_note(&mut self, note: Note) { + self.notes.push(note); + } + pub fn add_repo_citation(&mut self, citation: RepoCitation) { self.repo_citations.push(citation); } @@ -51,6 +79,13 @@ impl Parser for Source { break; } } + + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + match &tokenizer.current_token { Token::Tag(tag) => match tag.as_str() { "DATA" => tokenizer.next_token(), @@ -62,10 +97,26 @@ impl Parser for Source { } "AGNC" => self.data.agency = Some(tokenizer.take_line_value()), "ABBR" => self.abbreviation = Some(tokenizer.take_continued_text(level + 1)), + "CHAN" => self.date = Some(Box::new(ChangeDate::new(tokenizer, level + 1))), "TITL" => self.title = Some(tokenizer.take_continued_text(level + 1)), + "AUTH" => self.author = Some(tokenizer.take_continued_text(level + 1)), + "PUBL" => { + self.publication_facts = Some(tokenizer.take_continued_text(level + 1)) + } + "TEXT" => { + self.citation_from_source = Some(tokenizer.take_continued_text(level + 1)) + } + "OBJE" => { + self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)) + } + "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), "REPO" => self.add_repo_citation(RepoCitation::new(tokenizer, level + 1)), _ => panic!("{} Unhandled Source Tag: {}", tokenizer.debug(), tag), }, + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + } Token::Level(_) => tokenizer.next_token(), _ => panic!("Unhandled Source Token: {:?}", tokenizer.current_token), } From 872806b8ea0a6e1bf1f211485cae9f699b3e8a0a Mon Sep 17 00:00:00 2001 From: ge3224 Date: Wed, 30 Nov 2022 00:48:26 -0600 Subject: [PATCH 51/55] Modify README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 19ece88..0c749b2 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ gedcom = { version = "", features = ["json"] } ## 🚧 Progress 🚧 -There are still parts of the specification not yet implemented, and the project is subject to change. The way development has been happening is by taking a GEDCOM file, attempting to parse it and acting on whatever errors or omissions occur. In its current state, it is capable of parsing the [sample.ged](tests/fixtures/sample.ged) in its entirety. +There are still parts of the specification not yet implemented, and the project is subject to change. The way development has been happening is by taking a GEDCOM file, attempting to parse it and acting on whatever errors or omissions occur. In its current state, it is capable of parsing the [Heiner Eichmann's](http://heiner-eichmann.de/gedcom/allged.htm) [`allged.ged`](tests/fixtures/allged.ged) in its entirety. Here are some notes about parsed data & tags. Page references are to the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf). From f6a10de874eae9040b0f3bf5583d0b449f0c2ba2 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Thu, 1 Dec 2022 15:10:13 -0600 Subject: [PATCH 52/55] Refactor Parser implementations --- src/lib.rs | 70 +++++++++--- src/tokenizer.rs | 16 +-- src/types/address.rs | 43 +++----- src/types/corporation.rs | 37 ++----- src/types/date.rs | 52 +++------ src/types/event.rs | 77 ++++--------- src/types/family.rs | 54 +++------ src/types/header.rs | 221 ++++++++++++------------------------- src/types/individual.rs | 231 ++++++++++++--------------------------- src/types/multimedia.rs | 179 ++++++++++-------------------- src/types/note.rs | 28 ++--- src/types/repository.rs | 65 ++++------- src/types/source.rs | 218 +++++++++++------------------------- src/types/submission.rs | 85 +++++--------- src/types/submitter.rs | 44 +++----- src/types/translation.rs | 29 ++--- 16 files changed, 490 insertions(+), 959 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3540850..8d4d318 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,7 +31,7 @@ use tokenizer::{Token, Tokenizer}; pub mod types; use types::{ - Family, Header, Individual, MultimediaRecord, Repository, Source, SubmissionRecord, Submitter, + Family, Header, Individual, MultimediaRecord, Repository, Source, Submission, Submitter, UserDefinedData, }; @@ -81,6 +81,57 @@ pub trait Parser { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8); } +#[must_use] +/// Helper function for converting GEDCOM file content stream to parsed data. +pub fn parse_ged(content: std::str::Chars) -> GedcomData { + let mut p = GedcomDocument::new(content); + p.parse_document() +} + +/// parse_subset is a helper function that handles some boilerplate code involved in implementing +/// the Parser trait. It returns a Vector of any UserDefinedData. +pub fn parse_subset( + tokenizer: &mut Tokenizer, + level: u8, + mut tag_handler: F, +) -> Vec +where + F: FnMut(&str, &mut Tokenizer), +{ + let mut custom_data = Vec::new(); + loop { + if let Token::Level(curl_level) = tokenizer.current_token { + if curl_level <= level { + break; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => { + let tag_clone = tag.clone(); + tag_handler(tag_clone.as_str(), tokenizer); + } + Token::CustomTag(tag) => { + let tag_clone = tag.clone(); + custom_data.push(parse_custom_tag(tokenizer, tag_clone)); + } + Token::Level(_) => tokenizer.next_token(), + _ => panic!( + "{}, Unhandled Token: {:?}", + tokenizer.debug(), + tokenizer.current_token + ), + } + } + custom_data +} + +/// parse_custom_tag handles User Defined Data. See Gedcom 5.5 spec, p.56 +pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> UserDefinedData { + let value = tokenizer.take_line_value(); + UserDefinedData { tag, value } +} + /// GedcomData is the data structure representing all the data within a gedcom file /// /// # Example @@ -129,8 +180,8 @@ pub struct GedcomData { pub header: Option
, /// List of submitters of the facts pub submitters: Vec, - /// List of submission records - pub submissions: Vec, + /// List of submission records + pub submissions: Vec, /// Individuals within the family tree pub individuals: Vec, /// The family units of the tree, representing relationships between individuals @@ -179,7 +230,7 @@ impl GedcomData { } /// Add a `Submission` to the tree - pub fn add_submission(&mut self, submission: SubmissionRecord) { + pub fn add_submission(&mut self, submission: Submission) { self.submissions.push(submission); } @@ -246,7 +297,7 @@ impl Parser for GedcomData { self.add_repository(Repository::new(tokenizer, current_level, pointer)) } "SOUR" => self.add_source(Source::new(tokenizer, current_level, pointer)), - "SUBN" => self.add_submission(SubmissionRecord::new(tokenizer, level, pointer)), + "SUBN" => self.add_submission(Submission::new(tokenizer, level, pointer)), "SUBM" => self.add_submitter(Submitter::new(tokenizer, level, pointer)), "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level, pointer)), "TRLR" => break, @@ -257,7 +308,7 @@ impl Parser for GedcomData { }; } else if let Token::CustomTag(tag) = &tokenizer.current_token { let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); while tokenizer.current_token != Token::Level(level) { tokenizer.next_token(); } @@ -272,10 +323,3 @@ impl Parser for GedcomData { } } } - -#[must_use] -/// Helper function for converting GEDCOM file content stream to parsed data. -pub fn parse_ged(content: std::str::Chars) -> GedcomData { - let mut p = GedcomDocument::new(content); - p.parse_document() -} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 115a643..32cf8be 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,9 +1,7 @@ //! Handles the tokenization of a GEDCOM file use std::str::Chars; -use crate::types::UserDefinedData; - -/// The base enum of Token types making use of +/// The base enum of Token types making use of /// [GEDCOM Standard Release 5.5.1](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf), /// p.11 `gedcom_line: level + delim + [optional_xref_ID] + tag + [optional_line_value] + /// terminator` @@ -202,15 +200,13 @@ impl<'a> Tokenizer<'a> { _ => panic!("{} Unhandled Continuation Tag: {}", self.debug(), tag), }, Token::Level(_) => self.next_token(), - _ => panic!("{} Unhandled Continuation Token: {:?}", self.debug(), self.current_token), + _ => panic!( + "{} Unhandled Continuation Token: {:?}", + self.debug(), + self.current_token + ), } } value } - - /// parse_custom_tag handles User Defined Data. See Gedcom 5.5 spec, p.56 - pub fn parse_custom_tag(&mut self, tag: String) -> UserDefinedData { - let value = self.take_line_value(); - UserDefinedData { tag, value } - } } diff --git a/src/types/address.rs b/src/types/address.rs index 23e0e58..b5f11fc 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -3,8 +3,10 @@ use serde::{Deserialize, Serialize}; use std::fmt; use crate::{ - Parser, + parse_subset, tokenizer::{Token, Tokenizer}, + types::UserDefinedData, + Parser, }; /// Physical address at which a fact occurs @@ -19,6 +21,7 @@ pub struct Address { pub state: Option, pub post: Option, pub country: Option, + pub custom_data: Vec, } impl Address { @@ -44,31 +47,21 @@ impl Parser for Address { tokenizer.next_token(); } - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "CONT" | "CONC" => { + value.push('\n'); + value.push_str(&tokenizer.take_line_value()); } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONT" | "CONC" => { - value.push('\n'); - value.push_str(&tokenizer.take_line_value()); - } - "ADR1" => self.adr1 = Some(tokenizer.take_line_value()), - "ADR2" => self.adr2 = Some(tokenizer.take_line_value()), - "ADR3" => self.adr3 = Some(tokenizer.take_line_value()), - "CITY" => self.city = Some(tokenizer.take_line_value()), - "STAE" => self.state = Some(tokenizer.take_line_value()), - "POST" => self.post = Some(tokenizer.take_line_value()), - "CTRY" => self.country = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled Address Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Address Token: {:?}", tokenizer.current_token), - } - } + "ADR1" => self.adr1 = Some(tokenizer.take_line_value()), + "ADR2" => self.adr2 = Some(tokenizer.take_line_value()), + "ADR3" => self.adr3 = Some(tokenizer.take_line_value()), + "CITY" => self.city = Some(tokenizer.take_line_value()), + "STAE" => self.state = Some(tokenizer.take_line_value()), + "POST" => self.post = Some(tokenizer.take_line_value()), + "CTRY" => self.country = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Address Tag: {}", tokenizer.debug(), tag), + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); if &value != "" { self.value = Some(value); diff --git a/src/types/corporation.rs b/src/types/corporation.rs index 28d67eb..8f1d84d 100644 --- a/src/types/corporation.rs +++ b/src/types/corporation.rs @@ -1,7 +1,8 @@ use crate::{ - Parser, - tokenizer::{Token, Tokenizer}, + parse_subset, + tokenizer::Tokenizer, types::Address, + Parser, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -24,7 +25,6 @@ pub struct Corporation { pub website: Option, } - impl Corporation { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8) -> Corporation { @@ -39,27 +39,14 @@ impl Parser for Corporation { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), - "PHON" => self.phone = Some(tokenizer.take_line_value()), - "EMAIL" => self.email = Some(tokenizer.take_line_value()), - "FAX" => self.fax = Some(tokenizer.take_line_value()), - "WWW" => self.website = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled CORP tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled CORP tag in header: {:?}", - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), + "PHON" => self.phone = Some(tokenizer.take_line_value()), + "EMAIL" => self.email = Some(tokenizer.take_line_value()), + "FAX" => self.fax = Some(tokenizer.take_line_value()), + "WWW" => self.website = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled CORP tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/date.rs b/src/types/date.rs index d90f004..6ff729f 100644 --- a/src/types/date.rs +++ b/src/types/date.rs @@ -1,13 +1,13 @@ use crate::{ - Parser, - tokenizer::{Token, Tokenizer}, + parse_subset, + tokenizer::Tokenizer, types::Note, + Parser, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; - /// Date encompasses a number of date formats, e.g. approximated, period, phrase and range. /// /// # Example @@ -76,21 +76,11 @@ impl Parser for Date { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "TIME" => self.time = Some(tokenizer.take_line_value()), - _ => panic!("{} unhandled DATE tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unexpected DATE token: {:?}", tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "TIME" => self.time = Some(tokenizer.take_line_value()), + _ => panic!("{} unhandled DATE tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -108,7 +98,7 @@ impl Parser for Date { /// 2 FORM LINEAGE-LINKED\n\ /// 0 @MEDIA1@ OBJE\n\ /// 1 FILE /home/user/media/file_name.bmp\n\ -/// 1 CHAN +/// 1 CHAN /// 2 DATE 1 APR 1998 /// 3 TIME 12:34:56.789 /// 2 NOTE A note @@ -131,7 +121,6 @@ impl Parser for Date { #[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct ChangeDate { - pub value: Option, pub date: Option, pub note: Option, } @@ -149,22 +138,11 @@ impl Parser for ChangeDate { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - _ => panic!("{} unhandled ChangeDate tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unexpected ChangeDate token: {:?}", tokenizer.current_token), - } - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + _ => panic!("{} unhandled ChangeDate tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/event.rs b/src/types/event.rs index 10a37a4..e4eacc0 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,4 +1,5 @@ use crate::{ + parse_subset, tokenizer::{Token, Tokenizer}, types::{Date, FamilyLink, Note, SourceCitation}, Parser, @@ -183,7 +184,7 @@ impl EventDetail { } pub fn add_family_event_detail(&mut self, detail: FamilyEventDetail) { - self.family_event_details.push(detail); + self.family_event_details.push(detail); } #[must_use] @@ -240,35 +241,19 @@ impl Parser for EventDetail { tokenizer.next_token(); } - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "PLAC" => self.place = Some(tokenizer.take_line_value()), + "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), + "FAMC" => self.family_link = Some(FamilyLink::new(tokenizer, level + 1, tag)), + "HUSB" | "WIFE" => { + self.add_family_event_detail(FamilyEventDetail::new(tokenizer, level + 1, tag)); } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "PLAC" => self.place = Some(tokenizer.take_line_value()), - "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), - "FAMC" => { - let tag_clone = tag.clone(); - self.family_link = - Some(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())) - } - "HUSB" | "WIFE" => { - let tag_clone = tag.clone(); - self.add_family_event_detail(FamilyEventDetail::new(tokenizer, level + 1, tag_clone.as_str())); - } - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "TYPE" => self.event_type = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Event Token: {:?}", tokenizer.current_token), - } - } + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "TYPE" => self.event_type = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); if &value != "" { self.value = Some(value); @@ -359,29 +344,15 @@ impl FamilyEventDetail { impl Parser for FamilyEventDetail { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "AGE" => self.age = Some(tokenizer.take_line_value()), - _ => panic!( - "{}, Unrecognized FamilyEventDetail tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "{} Unrecognized FamilyEventDetail: {:?}", - tokenizer.debug(), - tokenizer.current_token - ), - } - } + + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "AGE" => self.age = Some(tokenizer.take_line_value()), + _ => panic!( + "{}, Unrecognized FamilyEventDetail tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/family.rs b/src/types/family.rs index 81f6400..e48e194 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -1,4 +1,5 @@ use crate::{ + parse_subset, tokenizer::{Token, Tokenizer}, types::{ event::HasEvents, ChangeDate, EventDetail, MultimediaRecord, Note, SourceCitation, @@ -79,10 +80,6 @@ impl Family { pub fn add_note(&mut self, note: Note) { self.notes.push(note); } - - pub fn add_custom_data(&mut self, custom: UserDefinedData) { - self.custom_data.push(custom); - } } impl Parser for Family { @@ -91,46 +88,31 @@ impl Parser for Family { // skip over FAM tag name tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| { let mut pointer: Option = None; if let Token::Pointer(xref) = &tokenizer.current_token { pointer = Some(xref.to_string()); tokenizer.next_token(); } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MARR" | "ANUL" | "CENS" | "DIV" | "DIVF" | "ENGA" | "MARB" | "MARC" - | "MARL" | "MARS" | "RESI" | "EVEN" => { - let tag_clone = tag.clone(); - self.add_event(EventDetail::new(tokenizer, level + 1, tag_clone.as_str())); - } - "HUSB" => self.set_individual1(tokenizer.take_line_value()), - "WIFE" => self.set_individual2(tokenizer.take_line_value()), - "CHIL" => self.add_child(tokenizer.take_line_value()), - "NCHI" => self.num_children = Some(tokenizer.take_line_value()), - "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - "SOUR" => self.add_source(SourceCitation::new(tokenizer, level + 1)), - "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), - "OBJE" => { - self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)) - } - _ => panic!("{} Unhandled Family Tag: {}", tokenizer.debug(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + match tag { + "MARR" | "ANUL" | "CENS" | "DIV" | "DIVF" | "ENGA" | "MARB" | "MARC" | "MARL" + | "MARS" | "RESI" | "EVEN" => { + self.add_event(EventDetail::new(tokenizer, level + 1, tag)); } - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Family Token: {:?}", tokenizer.current_token), + "HUSB" => self.set_individual1(tokenizer.take_line_value()), + "WIFE" => self.set_individual2(tokenizer.take_line_value()), + "CHIL" => self.add_child(tokenizer.take_line_value()), + "NCHI" => self.num_children = Some(tokenizer.take_line_value()), + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "SOUR" => self.add_source(SourceCitation::new(tokenizer, level + 1)), + "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), + "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)), + _ => panic!("{} Unhandled Family Tag: {}", tokenizer.debug(), tag), } - } + }; + + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/header.rs b/src/types/header.rs index 5d81c2d..2f90872 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -1,7 +1,8 @@ use crate::{ - Parser, - tokenizer::{Token, Tokenizer}, + parse_subset, + tokenizer::Tokenizer, types::{Corporation, Date, Note}, + Parser, }; #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; @@ -87,46 +88,31 @@ impl Header { header.parse(tokenizer, level); header } - - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) - } } impl Parser for Header { /// Parses HEAD top-level tag. See /// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - // let mut head = Header::default(); - // skip over HEAD tag name tokenizer.next_token(); - while tokenizer.current_token != Token::Level(level) { - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "GEDC" => self.gedcom = Some(GedcomMeta::new(tokenizer, level + 1)), - "SOUR" => self.source = Some(HeadSour::new(tokenizer, level + 1)), - "DEST" => self.destination = Some(tokenizer.take_line_value()), - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "SUBM" => self.submitter_tag = Some(tokenizer.take_line_value()), - "SUBN" => self.submission_tag = Some(tokenizer.take_line_value()), - "FILE" => self.filename = Some(tokenizer.take_line_value()), - "COPR" => self.copyright = Some(tokenizer.take_continued_text(level + 1)), - "CHAR" => self.encoding = Some(Encoding::new(tokenizer, level + 1)), - "LANG" => self.language = Some(tokenizer.take_line_value()), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "PLAC" => self.place = Some(HeadPlac::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Header Tag: {}", tokenizer.debug(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) - } - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Header Token: {:?}", &tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "GEDC" => self.gedcom = Some(GedcomMeta::new(tokenizer, level + 1)), + "SOUR" => self.source = Some(HeadSour::new(tokenizer, level + 1)), + "DEST" => self.destination = Some(tokenizer.take_line_value()), + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "SUBM" => self.submitter_tag = Some(tokenizer.take_line_value()), + "SUBN" => self.submission_tag = Some(tokenizer.take_line_value()), + "FILE" => self.filename = Some(tokenizer.take_line_value()), + "COPR" => self.copyright = Some(tokenizer.take_continued_text(level + 1)), + "CHAR" => self.encoding = Some(Encoding::new(tokenizer, level + 1)), + "LANG" => self.language = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "PLAC" => self.place = Some(HeadPlac::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Header Tag: {}", tokenizer.debug(), tag), + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } @@ -176,36 +162,22 @@ impl Parser for GedcomMeta { // skip GEDC tag tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "VERS" => self.version = Some(tokenizer.take_line_value()), + // this is the only value that makes sense. warn them otherwise. + "FORM" => { + let form = tokenizer.take_line_value(); + if &form.to_uppercase() != "LINEAGE-LINKED" { + println!( + "WARNING: Unrecognized GEDCOM form. Expected LINEAGE-LINKED, found {}", + form + ); } + self.form = Some(form); } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "VERS" => self.version = Some(tokenizer.take_line_value()), - // this is the only value that makes sense. warn them otherwise. - "FORM" => { - let form = tokenizer.take_line_value(); - if &form.to_uppercase() != "LINEAGE-LINKED" { - println!( - "WARNING: Unrecognized GEDCOM form. Expected LINEAGE-LINKED, found {}" - , form); - } - self.form = Some(form); - } - _ => panic!("{} Unhandled GEDC Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "{} Unexpected GEDC Token: {:?}", - tokenizer.debug(), - &tokenizer.current_token - ), - } - } + _ => panic!("{} Unhandled GEDC Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -256,25 +228,11 @@ impl Parser for Encoding { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "VERS" => self.version = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled CHAR Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "{} Unexpected CHAR Token: {:?}", - tokenizer.debug(), - &tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "VERS" => self.version = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled CHAR Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -336,24 +294,14 @@ impl Parser for HeadSour { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "VERS" => self.version = Some(tokenizer.take_line_value()), - "NAME" => self.name = Some(tokenizer.take_line_value()), - "CORP" => self.corporation = Some(Corporation::new(tokenizer, level + 1)), - "DATA" => self.data = Some(HeadSourData::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled CHAR Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unexpected SOUR Token: {:?}", tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "VERS" => self.version = Some(tokenizer.take_line_value()), + "NAME" => self.name = Some(tokenizer.take_line_value()), + "CORP" => self.corporation = Some(Corporation::new(tokenizer, level + 1)), + "DATA" => self.data = Some(HeadSourData::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled CHAR Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -413,29 +361,16 @@ impl Parser for HeadSourData { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "COPR" => self.copyright = Some(tokenizer.take_continued_text(level+1)), - _ => panic!( - "{} unhandled DATA tag in header: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled SOUR tag in header: {:?}", - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "COPR" => self.copyright = Some(tokenizer.take_continued_text(level + 1)), + _ => panic!( + "{} unhandled DATA tag in header: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -504,35 +439,23 @@ impl Parser for HeadPlac { // In the header, PLAC should have no payload. See // https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEAD-PLAC tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "FORM" => { - let form = tokenizer.take_line_value(); - let jurisdictional_titles = form.split(","); - for t in jurisdictional_titles { - let v = t.trim(); - self.push_jurisdictional_title(v.to_string()); - } - } - _ => panic!( - "{} Unhandled PLAC tag in header: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled PLAC tag in header: {:?}", - tokenizer.current_token - ), + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "FORM" => { + let form = tokenizer.take_line_value(); + let jurisdictional_titles = form.split(","); + + for t in jurisdictional_titles { + let v = t.trim(); + self.push_jurisdictional_title(v.to_string()); + } } - } + _ => panic!( + "{} Unhandled PLAC tag in header: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/individual.rs b/src/types/individual.rs index 5b7d7c0..8f21f22 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -1,4 +1,5 @@ use crate::{ + parse_subset, tokenizer::{Token, Tokenizer}, types::{ event::HasEvents, ChangeDate, Date, EventDetail, MultimediaRecord, Note, SourceCitation, @@ -11,7 +12,7 @@ use crate::{ use serde::{Deserialize, Serialize}; /// A Person within the family tree -#[derive(Debug)] +#[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Individual { pub xref: Option, @@ -31,20 +32,8 @@ pub struct Individual { impl Individual { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Individual { - let mut indi = Individual { - xref, - name: None, - sex: None, - events: Vec::new(), - families: Vec::new(), - attributes: Vec::new(), - custom_data: Vec::new(), - last_updated: None, - source: Vec::new(), - multimedia: Vec::new(), - change_date: None, - note: None, - }; + let mut indi = Individual::default(); + indi.xref = xref; indi.parse(tokenizer, level); indi } @@ -62,10 +51,6 @@ impl Individual { } } - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) - } - pub fn add_source_citation(&mut self, sour: SourceCitation) { self.source.push(sour); } @@ -94,51 +79,33 @@ impl Parser for Individual { // skip over INDI tag name tokenizer.next_token(); - while tokenizer.current_token != Token::Level(level) { - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - // TODO handle xref - "NAME" => self.name = Some(Name::new(tokenizer, level + 1)), - "SEX" => self.sex = Some(Gender::new(tokenizer, level + 1)), - "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" - | "CHR" | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" - | "IMMI" | "NATU" | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" - | "MARR" => { - let tag_clone = tag.clone(); - self.add_event(EventDetail::new(tokenizer, level + 1, tag_clone.as_str())); - } - "CAST" | "DSCR" | "EDUC" | "IDNO" | "NATI" | "NCHI" | "NMR" | "OCCU" - | "PROP" | "RELI" | "SSN" | "TITL" | "FACT" => { - // RESI should be an attribute or an event? - let tag_clone = tag.clone(); - self.add_attribute(AttributeDetail::new( - tokenizer, - level + 1, - tag_clone.as_str(), - )); - } - "FAMC" | "FAMS" => { - let tag_clone = tag.clone(); - self.add_family(FamilyLink::new(tokenizer, level + 1, tag_clone.as_str())); - } - "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - "SOUR" => { - self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); - } - "OBJE" => { - self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)) - } - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Individual Tag: {}", tokenizer.debug(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) - } - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Individual Token: {:?}", tokenizer.current_token), + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + // TODO handle xref + "NAME" => self.name = Some(Name::new(tokenizer, level + 1)), + "SEX" => self.sex = Some(Gender::new(tokenizer, level + 1)), + "ADOP" | "BIRT" | "BAPM" | "BARM" | "BASM" | "BLES" | "BURI" | "CENS" | "CHR" + | "CHRA" | "CONF" | "CREM" | "DEAT" | "EMIG" | "FCOM" | "GRAD" | "IMMI" | "NATU" + | "ORDN" | "RETI" | "RESI" | "PROB" | "WILL" | "EVEN" | "MARR" => { + self.add_event(EventDetail::new(tokenizer, level + 1, tag)); } - } + "CAST" | "DSCR" | "EDUC" | "IDNO" | "NATI" | "NCHI" | "NMR" | "OCCU" | "PROP" + | "RELI" | "SSN" | "TITL" | "FACT" => { + // RESI should be an attribute or an event? + self.add_attribute(AttributeDetail::new(tokenizer, level + 1, tag)); + } + "FAMC" | "FAMS" => { + self.add_family(FamilyLink::new(tokenizer, level + 1, tag)); + } + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "SOUR" => { + self.add_source_citation(SourceCitation::new(tokenizer, level + 1)); + } + "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, None)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Individual Tag: {}", tokenizer.debug(), tag), + }; + + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } @@ -221,10 +188,6 @@ impl Gender { pub fn add_source_citation(&mut self, sour: SourceCitation) { self.sources.push(sour); } - - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) - } } impl Parser for Gender { @@ -247,31 +210,12 @@ impl Parser for Gender { tokenizer.next_token(); } - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "FACT" => self.fact = Some(tokenizer.take_continued_text(level + 1)), - "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), - _ => panic!("{}, Unhandled Gender tag: {}", tokenizer.debug(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); - } - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "{}, Unhandled Gender token: {:?}", - tokenizer.debug(), - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "FACT" => self.fact = Some(tokenizer.take_continued_text(level + 1)), + "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), + _ => panic!("{}, Unhandled Gender tag: {}", tokenizer.debug(), tag), + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } @@ -396,6 +340,7 @@ pub struct FamilyLink { pub child_linkage_status: Option, pub adopted_by: Option, pub note: Option, + pub custom_data: Vec, } impl FamilyLink { @@ -414,6 +359,7 @@ impl FamilyLink { child_linkage_status: None, adopted_by: None, note: None, + custom_data: Vec::new(), }; family_link.parse(tokenizer, level); family_link @@ -456,26 +402,14 @@ impl FamilyLink { impl Parser for FamilyLink { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PEDI" => self.set_pedigree(tokenizer.take_line_value().as_str()), - "STAT" => self.set_child_linkage_status(&tokenizer.take_line_value().as_str()), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "ADOP" => { - self.set_adopted_by_which_parent(&tokenizer.take_line_value().as_str()) - } - _ => panic!("{} Unhandled FamilyLink Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled FamilyLink Token: {:?}", tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "PEDI" => self.set_pedigree(tokenizer.take_line_value().as_str()), + "STAT" => self.set_child_linkage_status(&tokenizer.take_line_value().as_str()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "ADOP" => self.set_adopted_by_which_parent(&tokenizer.take_line_value().as_str()), + _ => panic!("{} Unhandled FamilyLink Tag: {}", tokenizer.debug(), tag), + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } @@ -517,27 +451,17 @@ impl Parser for Name { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "GIVN" => self.given = Some(tokenizer.take_line_value()), - "NPFX" => self.prefix = Some(tokenizer.take_line_value()), - "NSFX" => self.suffix = Some(tokenizer.take_line_value()), - "SPFX" => self.surname_prefix = Some(tokenizer.take_line_value()), - "SURN" => self.surname = Some(tokenizer.take_line_value()), - "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Name Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Name Token: {:?}", tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "GIVN" => self.given = Some(tokenizer.take_line_value()), + "NPFX" => self.prefix = Some(tokenizer.take_line_value()), + "NSFX" => self.suffix = Some(tokenizer.take_line_value()), + "SPFX" => self.surname_prefix = Some(tokenizer.take_line_value()), + "SURN" => self.surname = Some(tokenizer.take_line_value()), + "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Name Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -692,34 +616,19 @@ impl Parser for AttributeDetail { tokenizer.next_token(); } - loop { - if let Token::Level(cur_level) = &tokenizer.current_token { - if cur_level <= &level { - break; - } - } - // tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), - "PLAC" => self.place = Some(tokenizer.take_line_value()), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "TYPE" => self.attribute_type = Some(tokenizer.take_continued_text(level + 1)), - _ => panic!( - "{}, Unhandled AttributeDetail tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "{}, Unhandled AttributeDetail token: {:?}", - tokenizer.debug(), - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "SOUR" => self.add_source_citation(SourceCitation::new(tokenizer, level + 1)), + "PLAC" => self.place = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "TYPE" => self.attribute_type = Some(tokenizer.take_continued_text(level + 1)), + _ => panic!( + "{}, Unhandled AttributeDetail tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); if &value != "" { self.value = Some(value); diff --git a/src/types/multimedia.rs b/src/types/multimedia.rs index 12666a4..a47b761 100644 --- a/src/types/multimedia.rs +++ b/src/types/multimedia.rs @@ -1,5 +1,6 @@ use crate::{ - tokenizer::{Token, Tokenizer}, + parse_subset, + tokenizer::Tokenizer, types::{ChangeDate, Note, SourceCitation, Xref}, Parser, }; @@ -44,7 +45,7 @@ use crate::{ /// let rin = obje.automated_record_id.as_ref().unwrap(); /// assert_eq!(rin, "Automated Id"); /// ``` -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct MultimediaRecord { /// Optional reference to link to this submitter @@ -66,17 +67,8 @@ pub struct MultimediaRecord { impl MultimediaRecord { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> MultimediaRecord { - let mut obje = MultimediaRecord { - xref, - file: None, - form: None, - title: None, - user_reference_number: None, - automated_record_id: None, - source_citation: None, - change_date: None, - note_structure: None, - }; + let mut obje = MultimediaRecord::default(); + obje.xref = xref; obje.parse(tokenizer, level); obje } @@ -86,33 +78,21 @@ impl Parser for MultimediaRecord { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip current line tokenizer.next_token(); - loop { - if let Token::Level(curl_level) = tokenizer.current_token { - if curl_level <= level { - break; - } - } - tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), - "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), - "TITL" => self.title = Some(tokenizer.take_line_value()), - "REFN" => { - self.user_reference_number = - Some(UserReferenceNumber::new(tokenizer, level + 1)) - } - "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()), - "NOTE" => self.note_structure = Some(Note::new(tokenizer, level + 1)), - "SOUR" => { - self.source_citation = Some(SourceCitation::new(tokenizer, level + 1)) - } - "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Multimedia Tag: {}", tokenizer.debug(), tag), - }, - _ => panic!("Unhandled Multimedia Token: {:?}", tokenizer.current_token), + + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), + "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), + "TITL" => self.title = Some(tokenizer.take_line_value()), + "REFN" => { + self.user_reference_number = Some(UserReferenceNumber::new(tokenizer, level + 1)) } - } + "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()), + "NOTE" => self.note_structure = Some(Note::new(tokenizer, level + 1)), + "SOUR" => self.source_citation = Some(SourceCitation::new(tokenizer, level + 1)), + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Multimedia Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -185,23 +165,14 @@ impl Parser for MultimediaLink { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip current line tokenizer.next_token(); - loop { - if let Token::Level(curl_level) = tokenizer.current_token { - if curl_level <= level { - break; - } - } - tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), - "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), - "TITL" => self.title = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled Multimedia Tag: {}", tokenizer.debug(), tag), - }, - _ => panic!("Unhandled Multimedia Token: {:?}", tokenizer.current_token), - } - } + + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "FILE" => self.file = Some(MultimediaFileRefn::new(tokenizer, level + 1)), + "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), + "TITL" => self.title = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Multimedia Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -260,29 +231,16 @@ impl MultimediaFileRefn { impl Parser for MultimediaFileRefn { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(curl_level) = &tokenizer.current_token { - if curl_level <= &level { - break; - } - } - tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "TITL" => self.title = Some(tokenizer.take_line_value()), - "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), - _ => panic!( - "{} Unhandled MultimediaFileRefn Tag: {}", - tokenizer.debug(), - tag - ), - }, - _ => panic!( - "Unhandled MultimediaFileRefn Token: {:?}", - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "TITL" => self.title = Some(tokenizer.take_line_value()), + "FORM" => self.form = Some(MultimediaFormat::new(tokenizer, level + 1)), + _ => panic!( + "{} Unhandled MultimediaFileRefn Tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -337,28 +295,16 @@ impl MultimediaFormat { impl Parser for MultimediaFormat { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(curl_level) = &tokenizer.current_token { - if curl_level <= &level { - break; - } - } - tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "TYPE" => self.source_media_type = Some(tokenizer.take_line_value()), - _ => panic!( - "{} Unhandled MultimediaFormat Tag: {}", - tokenizer.debug(), - tag - ), - }, - _ => panic!( - "Unhandled MultimediaFormat Token: {:?}", - tokenizer.current_token - ), - } - } + + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "TYPE" => self.source_media_type = Some(tokenizer.take_line_value()), + _ => panic!( + "{} Unhandled MultimediaFormat Tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -416,27 +362,14 @@ impl Parser for UserReferenceNumber { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(curl_level) = &tokenizer.current_token { - if curl_level <= &level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "TYPE" => self.user_reference_type = Some(tokenizer.take_line_value()), - _ => panic!( - "{} Unhandled UserReferenceNumber Tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled UserReferenceNumber Token: {:?}", - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "TYPE" => self.user_reference_type = Some(tokenizer.take_line_value()), + _ => panic!( + "{} Unhandled UserReferenceNumber Tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/note.rs b/src/types/note.rs index 9b21008..858b331 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -1,5 +1,6 @@ use crate::{ - tokenizer::{Token, Tokenizer}, + parse_subset, + tokenizer::Tokenizer, types::{Source, Translation}, Parser, }; @@ -82,23 +83,12 @@ impl Parser for Note { /// parse handles the NOTE tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { self.value = Some(tokenizer.take_continued_text(level)); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MIME" => self.mime = Some(tokenizer.take_line_value()), - "TRANS" => self.translation = Some(Translation::new(tokenizer, level + 1)), - "LANG" => self.language = Some(tokenizer.take_line_value()), - _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "MIME" => self.mime = Some(tokenizer.take_line_value()), + "TRANS" => self.translation = Some(Translation::new(tokenizer, level + 1)), + "LANG" => self.language = Some(tokenizer.take_line_value()), + _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/repository.rs b/src/types/repository.rs index 9b916a0..6a1a941 100644 --- a/src/types/repository.rs +++ b/src/types/repository.rs @@ -1,12 +1,13 @@ use crate::{ + parse_subset, + tokenizer::Tokenizer, Parser, - tokenizer::{Token, Tokenizer}, }; use super::{Address, Xref}; /// Data repository, the `REPO` tag -#[derive(Debug)] +#[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Repository { /// Optional reference to link to this repo @@ -20,11 +21,8 @@ pub struct Repository { impl Repository { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Repository { - let mut repo = Repository { - xref, - name: None, - address: None, - }; + let mut repo = Repository::default(); + repo.xref = xref; repo.parse(tokenizer, level); repo } @@ -36,27 +34,17 @@ impl Parser for Repository { // skip REPO tag tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "NAME" => self.name = Some(tokenizer.take_line_value()), - "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Repository Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Repository Token: {:?}", tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "NAME" => self.name = Some(tokenizer.take_line_value()), + "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Repository Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } /// Citation linking a `Source` to a data `Repository` -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct RepoCitation { /// Reference to the `Repository` @@ -67,10 +55,8 @@ pub struct RepoCitation { impl RepoCitation { pub fn new(tokenizer: &mut Tokenizer, level: u8) -> RepoCitation { - let mut rc = RepoCitation { - xref: tokenizer.take_line_value(), - call_number: None, - }; + let mut rc = RepoCitation::default(); + rc.xref = tokenizer.take_line_value(); rc.parse(tokenizer, level); rc } @@ -78,23 +64,10 @@ impl RepoCitation { impl Parser for RepoCitation { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CALN" => self.call_number = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled RepoCitation Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unhandled RepoCitation Token: {:?}", - tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "CALN" => self.call_number = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled RepoCitation Tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/source.rs b/src/types/source.rs index 8c9cdb7..3f23420 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -1,4 +1,5 @@ use crate::{ + parse_subset, tokenizer::{Token, Tokenizer}, types::{ ChangeDate, Date, EventDetail, MultimediaRecord, Note, RepoCitation, UserDefinedData, Xref, @@ -10,7 +11,7 @@ use crate::{ use serde::{Deserialize, Serialize}; /// Source for genealogy facts -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Source { pub xref: Option, @@ -20,7 +21,7 @@ pub struct Source { pub author: Option, pub publication_facts: Option, pub citation_from_source: Option, - pub date: Option>, + pub change_date: Option>, pub multimedia: Vec, pub notes: Vec, pub repo_citations: Vec, @@ -30,31 +31,12 @@ pub struct Source { impl Source { #[must_use] pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Source { - let mut sour = Source { - xref, - data: SourceData { - events: Vec::new(), - agency: None, - }, - abbreviation: None, - title: None, - date: None, - author: None, - publication_facts: None, - citation_from_source: None, - multimedia: Vec::new(), - notes: Vec::new(), - repo_citations: Vec::new(), - custom_data: Vec::new(), - }; + let mut sour = Source::default(); + sour.xref = xref; sour.parse(tokenizer, level); sour } - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data); - } - pub fn add_multimedia(&mut self, media: MultimediaRecord) { self.multimedia.push(media); } @@ -73,59 +55,41 @@ impl Parser for Source { // skip SOUR tag tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| { let mut pointer: Option = None; if let Token::Pointer(xref) = &tokenizer.current_token { pointer = Some(xref.to_string()); tokenizer.next_token(); } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATA" => tokenizer.next_token(), - "EVEN" => { - let events_recorded = tokenizer.take_line_value(); - let mut event = EventDetail::new(tokenizer, level + 2, "OTHER"); - event.with_source_data(events_recorded); - self.data.add_event(event); - } - "AGNC" => self.data.agency = Some(tokenizer.take_line_value()), - "ABBR" => self.abbreviation = Some(tokenizer.take_continued_text(level + 1)), - "CHAN" => self.date = Some(Box::new(ChangeDate::new(tokenizer, level + 1))), - "TITL" => self.title = Some(tokenizer.take_continued_text(level + 1)), - "AUTH" => self.author = Some(tokenizer.take_continued_text(level + 1)), - "PUBL" => { - self.publication_facts = Some(tokenizer.take_continued_text(level + 1)) - } - "TEXT" => { - self.citation_from_source = Some(tokenizer.take_continued_text(level + 1)) - } - "OBJE" => { - self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)) - } - "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), - "REPO" => self.add_repo_citation(RepoCitation::new(tokenizer, level + 1)), - _ => panic!("{} Unhandled Source Tag: {}", tokenizer.debug(), tag), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); + match tag { + "DATA" => tokenizer.next_token(), + "EVEN" => { + let events_recorded = tokenizer.take_line_value(); + let mut event = EventDetail::new(tokenizer, level + 2, "OTHER"); + event.with_source_data(events_recorded); + self.data.add_event(event); + } + "AGNC" => self.data.agency = Some(tokenizer.take_line_value()), + "ABBR" => self.abbreviation = Some(tokenizer.take_continued_text(level + 1)), + "CHAN" => self.change_date = Some(Box::new(ChangeDate::new(tokenizer, level + 1))), + "TITL" => self.title = Some(tokenizer.take_continued_text(level + 1)), + "AUTH" => self.author = Some(tokenizer.take_continued_text(level + 1)), + "PUBL" => self.publication_facts = Some(tokenizer.take_continued_text(level + 1)), + "TEXT" => { + self.citation_from_source = Some(tokenizer.take_continued_text(level + 1)) } - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Source Token: {:?}", tokenizer.current_token), + "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)), + "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), + "REPO" => self.add_repo_citation(RepoCitation::new(tokenizer, level + 1)), + _ => panic!("{} Unhandled Source Tag: {}", tokenizer.debug(), tag), } - } + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } #[allow(clippy::module_name_repetitions)] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct SourceData { events: Vec, @@ -188,44 +152,26 @@ impl SourceCitation { citation.parse(tokenizer, level); citation } - - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) - } } impl Parser for SourceCitation { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } + tokenizer.next_token(); - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "PAGE" => self.page = Some(tokenizer.take_continued_text(level + 1)), - "DATA" => self.data = Some(SourceCitationData::new(tokenizer, level + 1)), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "QUAY" => { - self.certainty_assessment = - Some(CertaintyAssessment::new(tokenizer, level + 1)) - } - _ => panic!( - "{} Unhandled SourceCitation Tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)) - } - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unhandled Citation Token: {:?}", tokenizer.current_token), + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "PAGE" => self.page = Some(tokenizer.take_continued_text(level + 1)), + "DATA" => self.data = Some(SourceCitationData::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "QUAY" => { + self.certainty_assessment = Some(CertaintyAssessment::new(tokenizer, level + 1)) } - } + _ => panic!( + "{} Unhandled SourceCitation Tag: {}", + tokenizer.debug(), + tag + ), + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } @@ -281,32 +227,16 @@ impl Parser for SourceCitationData { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { // skip because this DATA tag should have now line value tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - - tokenizer.next_token(); - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "TEXT" => self.text = Some(TextFromSource::new(tokenizer, level + 1)), - _ => panic!( - "{} unhandled SourceCitationData tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unexpected SourceCitationData token: {:?}", - tokenizer.current_token - ), - } - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "TEXT" => self.text = Some(TextFromSource::new(tokenizer, level + 1)), + _ => panic!( + "{} unhandled SourceCitationData tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); } } @@ -363,35 +293,19 @@ impl Parser for TextFromSource { let mut value = String::new(); value.push_str(&tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - - tokenizer.next_token(); - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "CONC" => value.push_str(&tokenizer.take_line_value()), - "CONT" => { - value.push('\n'); - value.push_str(&tokenizer.take_line_value()); - } - _ => panic!( - "{} unhandled TextFromSource tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "Unexpected TextFromSource token: {:?}", - &tokenizer.current_token - ), - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "CONC" => value.push_str(&tokenizer.take_line_value()), + "CONT" => { + value.push('\n'); + value.push_str(&tokenizer.take_line_value()); } - } + _ => panic!( + "{} unhandled TextFromSource tag: {}", + tokenizer.debug(), + tag + ), + }; + parse_subset(tokenizer, level, handle_subset); if value != "" { self.value = Some(value); @@ -421,7 +335,7 @@ impl Parser for TextFromSource { /// 0 @PERSON1@ INDI\n\ /// 1 SOUR @SOURCE1@\n\ /// 2 PAGE 42\n\ -/// 2 QUAY 1 +/// 2 QUAY 1\n\ /// 0 TRLR"; /// /// let mut ged = GedcomDocument::new(sample.chars()); diff --git a/src/types/submission.rs b/src/types/submission.rs index aaf300f..7b84afa 100644 --- a/src/types/submission.rs +++ b/src/types/submission.rs @@ -1,5 +1,6 @@ use crate::{ - tokenizer::{Token, Tokenizer}, + parse_subset, + tokenizer::Tokenizer, types::{ChangeDate, Note, UserDefinedData, Xref}, Parser, }; @@ -7,7 +8,7 @@ use crate::{ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -/// SubmissionRecord is used by the sending system to send instructions and information to the +/// Submission is used by the sending system to send instructions and information to the /// receiving system. The sending system uses a submission record to send instructions and /// information to the receiving system. The submission record is also used for communication /// between Ancestral File download requests and TempleReady. Each GEDCOM transmission file should @@ -37,7 +38,7 @@ use serde::{Deserialize, Serialize}; /// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct SubmissionRecord { +pub struct Submission { pub xref: Option, pub name_of_family_file: Option, pub temple_code: Option, @@ -51,70 +52,36 @@ pub struct SubmissionRecord { pub custom_data: Vec, } -impl SubmissionRecord { +impl Submission { #[must_use] - pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> SubmissionRecord { - let mut subn = SubmissionRecord { - xref, - name_of_family_file: None, - submitter_link: None, - generations_of_ancestors: None, - generations_of_descendants: None, - ordinance_process_flag: None, - automated_record_id: None, - temple_code: None, - note: None, - change_date: None, - custom_data: Vec::new(), - }; + pub fn new(tokenizer: &mut Tokenizer, level: u8, xref: Option) -> Submission { + let mut subn = Submission::default(); + subn.xref = xref; subn.parse(tokenizer, level); subn } - - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) - } } -impl Parser for SubmissionRecord { +impl Parser for Submission { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); - loop { - if let Token::Level(cur_level) = &tokenizer.current_token { - if cur_level <= &level { - break; - } - } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "ANCE" => self.generations_of_ancestors = Some(tokenizer.take_line_value()), - "DATE" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - "DESC" => self.generations_of_descendants = Some(tokenizer.take_line_value()), - "FAMF" => self.name_of_family_file = Some(tokenizer.take_line_value()), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "ORDI" => self.ordinance_process_flag = Some(tokenizer.take_line_value()), - "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()), - "SUBM" => self.submitter_link = Some(tokenizer.take_line_value()), - "TEMP" => self.temple_code = Some(tokenizer.take_line_value()), - _ => panic!( - "{}, Unhandled SubmissionRecord tag: {}", - tokenizer.debug(), - tag - ), - }, - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); - } - Token::Level(_) => tokenizer.next_token(), - _ => panic!( - "{}, Unhandled SubmissionRecord: {:?}", - tokenizer.debug(), - &tokenizer.current_token - ), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "ANCE" => self.generations_of_ancestors = Some(tokenizer.take_line_value()), + "DATE" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "DESC" => self.generations_of_descendants = Some(tokenizer.take_line_value()), + "FAMF" => self.name_of_family_file = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "ORDI" => self.ordinance_process_flag = Some(tokenizer.take_line_value()), + "RIN" => self.automated_record_id = Some(tokenizer.take_line_value()), + "SUBM" => self.submitter_link = Some(tokenizer.take_line_value()), + "TEMP" => self.temple_code = Some(tokenizer.take_line_value()), + _ => panic!( + "{}, Unhandled SubmissionRecord tag: {}", + tokenizer.debug(), + tag + ), + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 15755fd..0959e4f 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -1,7 +1,7 @@ use crate::{ - Parser, tokenizer::{Token, Tokenizer}, - types::{Address, ChangeDate, UserDefinedData, MultimediaLink, Note, Xref}, + types::{Address, ChangeDate, MultimediaLink, Note, UserDefinedData, Xref}, + Parser, parse_subset, }; #[cfg(feature = "json")] @@ -54,12 +54,6 @@ impl Submitter { pub fn add_multimedia(&mut self, multimedia: MultimediaLink) { self.multimedia.push(multimedia); } - - - /// - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) - } } impl Parser for Submitter { @@ -68,33 +62,23 @@ impl Parser for Submitter { // skip over SUBM tag name tokenizer.next_token(); - while tokenizer.current_token != Token::Level(level) { + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| { let mut pointer: Option = None; if let Token::Pointer(xref) = &tokenizer.current_token { pointer = Some(xref.to_string()); tokenizer.next_token(); } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "NAME" => self.name = Some(tokenizer.take_line_value()), - "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), - "OBJE" => { - self.add_multimedia(MultimediaLink::new(tokenizer, level + 1, pointer)) - } - "LANG" => self.language = Some(tokenizer.take_line_value()), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), - "PHON" => self.phone = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled Submitter Tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - Token::CustomTag(tag) => { - let tag_clone = tag.clone(); - self.add_custom_data(tokenizer.parse_custom_tag(tag_clone)); - } - _ => panic!("Unhandled Submitter Token: {:?}", tokenizer.current_token), + match tag { + "NAME" => self.name = Some(tokenizer.take_line_value()), + "ADDR" => self.address = Some(Address::new(tokenizer, level + 1)), + "OBJE" => self.add_multimedia(MultimediaLink::new(tokenizer, level + 1, pointer)), + "LANG" => self.language = Some(tokenizer.take_line_value()), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "CHAN" => self.change_date = Some(ChangeDate::new(tokenizer, level + 1)), + "PHON" => self.phone = Some(tokenizer.take_line_value()), + _ => panic!("{} Unhandled Submitter Tag: {}", tokenizer.debug(), tag), } - } + }; + self.custom_data = parse_subset(tokenizer, level, handle_subset); } } diff --git a/src/types/translation.rs b/src/types/translation.rs index 78f464b..cb94b4b 100644 --- a/src/types/translation.rs +++ b/src/types/translation.rs @@ -2,8 +2,8 @@ use serde::{Deserialize, Serialize}; use crate::{ - Parser, - tokenizer::{Token, Tokenizer}, + tokenizer::Tokenizer, + Parser, parse_subset, }; /// Translation (tag:TRAN) is a type of TRAN for unstructured human-readable text, such as @@ -30,28 +30,15 @@ impl Translation { } impl Parser for Translation { - ///parse handles the TRAN tag fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { - self.value = Some(tokenizer.take_line_value()); - loop { - if let Token::Level(cur_level) = tokenizer.current_token { - if cur_level <= level { - break; - } - } - - match &tokenizer.current_token { - Token::Tag(tag) => match tag.as_str() { - "MIME" => self.mime = Some(tokenizer.take_line_value()), - "LANG" => self.language = Some(tokenizer.take_line_value()), - _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), - }, - Token::Level(_) => tokenizer.next_token(), - _ => panic!("Unexpected NOTE token: {:?}", &tokenizer.current_token), - } - } + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { + "MIME" => self.mime = Some(tokenizer.take_line_value()), + "LANG" => self.language = Some(tokenizer.take_line_value()), + _ => panic!("{} unhandled NOTE tag: {}", tokenizer.debug(), tag), + }; + parse_subset(tokenizer, level, handle_subset); } } From 69706e4493caeb48ef0a337bd5c7486c248b62df Mon Sep 17 00:00:00 2001 From: ge3224 Date: Sat, 3 Dec 2022 15:12:35 -0600 Subject: [PATCH 53/55] Handle possible subset data of UserDefinedDatasets --- src/lib.rs | 34 ++++++------ src/tokenizer.rs | 17 +++--- src/types/address.rs | 4 +- src/types/custom.rs | 114 +++++++++++++++++++++++++++++++++++++++- src/types/event.rs | 38 ++++++++++---- src/types/family.rs | 4 +- src/types/header.rs | 4 +- src/types/individual.rs | 10 ++-- src/types/note.rs | 3 +- src/types/source.rs | 50 +++++++++++++----- src/types/submission.rs | 4 +- src/types/submitter.rs | 4 +- 12 files changed, 216 insertions(+), 70 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8d4d318..21f983e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,7 @@ use tokenizer::{Token, Tokenizer}; pub mod types; use types::{ Family, Header, Individual, MultimediaRecord, Repository, Source, Submission, Submitter, - UserDefinedData, + UserDefinedDataset, }; /// The GedcomDocument can convert the token list into a data structure. The order of the Dataset @@ -94,11 +94,11 @@ pub fn parse_subset( tokenizer: &mut Tokenizer, level: u8, mut tag_handler: F, -) -> Vec +) -> Vec> where F: FnMut(&str, &mut Tokenizer), { - let mut custom_data = Vec::new(); + let mut non_standard_dataset = Vec::new(); loop { if let Token::Level(curl_level) = tokenizer.current_token { if curl_level <= level { @@ -113,7 +113,12 @@ where } Token::CustomTag(tag) => { let tag_clone = tag.clone(); - custom_data.push(parse_custom_tag(tokenizer, tag_clone)); + non_standard_dataset.push(Box::new(UserDefinedDataset::new( + tokenizer, + level + 1, + &tag_clone, + ))); + // custom_data.push(parse_custom_tag(tokenizer, tag_clone)); } Token::Level(_) => tokenizer.next_token(), _ => panic!( @@ -123,13 +128,7 @@ where ), } } - custom_data -} - -/// parse_custom_tag handles User Defined Data. See Gedcom 5.5 spec, p.56 -pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> UserDefinedData { - let value = tokenizer.take_line_value(); - UserDefinedData { tag, value } + non_standard_dataset } /// GedcomData is the data structure representing all the data within a gedcom file @@ -168,10 +167,6 @@ pub fn parse_custom_tag(tokenizer: &mut Tokenizer, tag: String) -> UserDefinedDa /// /// assert_eq!(data.sources.len(), 1); /// assert_eq!(data.sources[0].xref.as_ref().unwrap(), "@SOURCE1@"); -/// -/// assert_eq!(data.custom_data.len(), 1); -/// assert_eq!(data.custom_data[0].tag, "_MYOWNTAG"); -/// assert_eq!(data.custom_data[0].value, "This is a non-standard tag. Not recommended but allowed"); /// ``` #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -196,7 +191,7 @@ pub struct GedcomData { /// so that they will not conflict with future GEDCOM standard tags. Systems that read /// user-defined tags must consider that they have meaning only with respect to a system /// contained in the HEAD.SOUR context. - pub custom_data: Vec, + pub custom_data: Vec>, } // should maybe store these by xref if available? @@ -245,8 +240,8 @@ impl GedcomData { } /// Adds a `UserDefinedData` to the tree - pub fn add_custom_data(&mut self, data: UserDefinedData) { - self.custom_data.push(data) + pub fn add_custom_data(&mut self, non_standard_data: UserDefinedDataset) { + self.custom_data.push(Box::new(non_standard_data)); } /// Outputs a summary of data contained in the tree to stdout @@ -308,7 +303,8 @@ impl Parser for GedcomData { }; } else if let Token::CustomTag(tag) = &tokenizer.current_token { let tag_clone = tag.clone(); - self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); + self.add_custom_data(UserDefinedDataset::new(tokenizer, level + 1, &tag_clone)); + // self.add_custom_data(parse_custom_tag(tokenizer, tag_clone)); while tokenizer.current_token != Token::Level(level) { tokenizer.next_token(); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 32cf8be..645e062 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -160,19 +160,22 @@ impl<'a> Tokenizer<'a> { /// Grabs and returns to the end of the current line as a String pub fn take_line_value(&mut self) -> String { - let value: String; + let mut value = String::from(""); self.next_token(); - if let Token::LineValue(val) = &self.current_token { - value = val.to_string(); - } else { - panic!( + match &self.current_token { + Token::LineValue(val) => { + value = val.to_string(); + self.next_token(); + } + // gracefully handle an attempt to take a value from a valueless line + Token::Level(_) => (), + _ => panic!( "{} Expected LineValue, found {:?}", self.debug(), self.current_token - ); + ), } - self.next_token(); value } diff --git a/src/types/address.rs b/src/types/address.rs index b5f11fc..3cf5433 100644 --- a/src/types/address.rs +++ b/src/types/address.rs @@ -5,7 +5,7 @@ use std::fmt; use crate::{ parse_subset, tokenizer::{Token, Tokenizer}, - types::UserDefinedData, + types::UserDefinedDataset, Parser, }; @@ -21,7 +21,7 @@ pub struct Address { pub state: Option, pub post: Option, pub country: Option, - pub custom_data: Vec, + pub custom_data: Vec>, } impl Address { diff --git a/src/types/custom.rs b/src/types/custom.rs index e602117..1a2d3c5 100644 --- a/src/types/custom.rs +++ b/src/types/custom.rs @@ -1,6 +1,116 @@ +use crate::{ + tokenizer::{Token, Tokenizer}, + Parser, +}; + +/// UserDefinedData handles User Defined Data. See Gedcom 5.5 spec, p.56 +/// +/// ``` +/// use gedcom::GedcomDocument; +/// +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @S1207169483@ SOUR\n\ +/// 1 TITL New York, U.S., New York National Guard Service Cards, 1917-1954\n\ +/// 0 @P10@ INDI\n\ +/// 1 _MILT \n\ +/// 2 DATE 3 Nov 1947\n\ +/// 2 PLAC Rochester, New York, USA\n\ +/// 2 SOUR @S1207169483@\n\ +/// 3 PAGE New York State Archives; Albany, New York; Collection: New York, New York National Guard Service Cards, 1917-1954; Series: Xxxxx; Film Number: Xx\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let custom = &data.individuals[0].custom_data; +/// assert_eq!(custom.len(), 1); +/// assert_eq!(custom[0].as_ref().tag, "_MILT"); +/// +/// let cs_date = custom[0].as_ref().children[0].as_ref(); +/// assert_eq!(cs_date.tag, "DATE"); +/// assert_eq!(cs_date.value.as_ref().unwrap(), "3 Nov 1947"); +/// +/// let cs_plac = custom[0].as_ref().children[1].as_ref(); +/// assert_eq!(cs_plac.tag, "PLAC"); +/// assert_eq!(cs_plac.value.as_ref().unwrap(), "Rochester, New York, USA"); +/// +/// let cs_sour = custom[0].as_ref().children[2].as_ref(); +/// assert_eq!(cs_sour.tag, "SOUR"); +/// assert_eq!(cs_sour.value.as_ref().unwrap(), "@S1207169483@"); +/// +/// let cs_sour_page = cs_sour.children[0].as_ref(); +/// assert_eq!(cs_sour_page.tag, "PAGE"); +/// assert_eq!(cs_sour_page.value.as_ref().unwrap(), "New York State Archives; Albany, New York; Collection: New York, New York National Guard Service Cards, 1917-1954; Series: Xxxxx; Film Number: Xx"); +/// ``` #[derive(Clone, Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] -pub struct UserDefinedData { +pub struct UserDefinedDataset { pub tag: String, - pub value: String, + pub value: Option, + pub children: Vec>, +} + +impl UserDefinedDataset { + #[must_use] + pub fn new(tokenizer: &mut Tokenizer, level: u8, tag: &str) -> UserDefinedDataset { + let mut udd = UserDefinedDataset { + tag: tag.to_string(), + value: None, + children: Vec::new(), + }; + udd.parse(tokenizer, level); + udd + } + + pub fn add_child(&mut self, child: UserDefinedDataset) { + self.children.push(Box::new(child)); + } +} + +impl Parser for UserDefinedDataset { + fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { + // skip ahead of initial tag + tokenizer.next_token(); + + let mut has_child = false; + loop { + if let Token::Level(current) = tokenizer.current_token { + if current <= level { + break; + } + if current > level { + has_child = true; + } + } + + match &tokenizer.current_token { + Token::Tag(tag) => { + if has_child { + let tag_clone = tag.clone(); + self.add_child(UserDefinedDataset::new(tokenizer, level + 1, &tag_clone)) + } + } + Token::CustomTag(tag) => { + if has_child { + let tag_clone = tag.clone(); + self.add_child(UserDefinedDataset::new(tokenizer, level + 1, &tag_clone)) + } + } + Token::LineValue(val) => { + self.value = Some(val.to_string()); + tokenizer.next_token(); + } + Token::Level(_) => tokenizer.next_token(), + Token::EOF => break, + _ => panic!( + "{}, Unhandled Token in UserDefinedDataset: {:?}", + tokenizer.debug(), + tokenizer.current_token + ), + } + } + } } diff --git a/src/types/event.rs b/src/types/event.rs index e4eacc0..7da7b62 100644 --- a/src/types/event.rs +++ b/src/types/event.rs @@ -1,7 +1,7 @@ use crate::{ parse_subset, tokenizer::{Token, Tokenizer}, - types::{Date, FamilyLink, Note, SourceCitation}, + types::{Date, FamilyLink, MultimediaRecord, Note, SourceCitation}, Parser, }; @@ -114,6 +114,7 @@ pub struct EventDetail { /// FACT tags are used. T. See GEDCOM 5.5 spec, page 35 and 49. pub event_type: Option, pub citations: Vec, + pub multimedia: Vec, } impl EventDetail { @@ -129,6 +130,7 @@ impl EventDetail { family_event_details: Vec::new(), event_type: None, citations: Vec::new(), + multimedia: Vec::new(), }; event.parse(tokenizer, level); event @@ -187,6 +189,10 @@ impl EventDetail { self.family_event_details.push(detail); } + pub fn add_multimedia_record(&mut self, m: MultimediaRecord) { + self.multimedia.push(m); + } + #[must_use] pub fn get_citations(&self) -> Vec { self.citations.clone() @@ -241,17 +247,27 @@ impl Parser for EventDetail { tokenizer.next_token(); } - let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { - "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), - "PLAC" => self.place = Some(tokenizer.take_line_value()), - "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), - "FAMC" => self.family_link = Some(FamilyLink::new(tokenizer, level + 1, tag)), - "HUSB" | "WIFE" => { - self.add_family_event_detail(FamilyEventDetail::new(tokenizer, level + 1, tag)); + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| { + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + match tag { + "DATE" => self.date = Some(Date::new(tokenizer, level + 1)), + "PLAC" => self.place = Some(tokenizer.take_line_value()), + "SOUR" => self.add_citation(SourceCitation::new(tokenizer, level + 1)), + "FAMC" => self.family_link = Some(FamilyLink::new(tokenizer, level + 1, tag)), + "HUSB" | "WIFE" => { + self.add_family_event_detail(FamilyEventDetail::new(tokenizer, level + 1, tag)); + } + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "TYPE" => self.event_type = Some(tokenizer.take_line_value()), + "OBJE" => { + self.add_multimedia_record(MultimediaRecord::new(tokenizer, level + 1, pointer)) + } + _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), } - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "TYPE" => self.event_type = Some(tokenizer.take_line_value()), - _ => panic!("{} Unhandled Event Tag: {}", tokenizer.debug(), tag), }; parse_subset(tokenizer, level, handle_subset); diff --git a/src/types/family.rs b/src/types/family.rs index e48e194..fb0ccc9 100644 --- a/src/types/family.rs +++ b/src/types/family.rs @@ -3,7 +3,7 @@ use crate::{ tokenizer::{Token, Tokenizer}, types::{ event::HasEvents, ChangeDate, EventDetail, MultimediaRecord, Note, SourceCitation, - UserDefinedData, Xref, + UserDefinedDataset, Xref, }, Parser, }; @@ -28,8 +28,8 @@ pub struct Family { pub events: Vec, pub sources: Vec, pub multimedia: Vec, - pub custom_data: Vec, pub notes: Vec, + pub custom_data: Vec>, } impl Family { diff --git a/src/types/header.rs b/src/types/header.rs index 2f90872..a11630d 100644 --- a/src/types/header.rs +++ b/src/types/header.rs @@ -7,7 +7,7 @@ use crate::{ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -use super::UserDefinedData; +use super::UserDefinedDataset; /// Header (tag: HEAD) containing GEDCOM metadata. /// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#HEADER @@ -78,7 +78,7 @@ pub struct Header { pub note: Option, /// tag: PLAC pub place: Option, - pub custom_data: Vec, + pub custom_data: Vec>, } impl Header { diff --git a/src/types/individual.rs b/src/types/individual.rs index 8f21f22..d746230 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -3,7 +3,7 @@ use crate::{ tokenizer::{Token, Tokenizer}, types::{ event::HasEvents, ChangeDate, Date, EventDetail, MultimediaRecord, Note, SourceCitation, - UserDefinedData, Xref, + UserDefinedDataset, Xref, }, Parser, }; @@ -19,7 +19,6 @@ pub struct Individual { pub name: Option, pub sex: Option, pub families: Vec, - pub custom_data: Vec, pub attributes: Vec, pub source: Vec, pub events: Vec, @@ -27,6 +26,7 @@ pub struct Individual { pub last_updated: Option, pub note: Option, pub change_date: Option, + pub custom_data: Vec>, } impl Individual { @@ -161,8 +161,6 @@ impl ToString for GenderType { /// assert_eq!(sex.fact.as_ref().unwrap(), "A fact about an individual's gender"); /// assert_eq!(sex.sources[0].xref, "@CITATION1@"); /// assert_eq!(sex.sources[0].page.as_ref().unwrap(), "Page: 132"); -/// assert_eq!(sex.sources[0].custom_data[0].tag, "_MYOWNTAG"); -/// assert_eq!(sex.sources[0].custom_data[0].value, "This is a non-standard tag. Not recommended but allowed"); /// ``` #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] @@ -170,7 +168,7 @@ pub struct Gender { pub value: GenderType, pub fact: Option, pub sources: Vec, - pub custom_data: Vec, + pub custom_data: Vec>, } impl Gender { @@ -340,7 +338,7 @@ pub struct FamilyLink { pub child_linkage_status: Option, pub adopted_by: Option, pub note: Option, - pub custom_data: Vec, + pub custom_data: Vec>, } impl FamilyLink { diff --git a/src/types/note.rs b/src/types/note.rs index 858b331..6f187d3 100644 --- a/src/types/note.rs +++ b/src/types/note.rs @@ -40,6 +40,7 @@ use serde::{Deserialize, Serialize}; /// 2 CONT Some Specials: This line is very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very very long but not too long (255 caharcters is the limit).\n\ /// 2 CONT This @@ (commercial at) character may only appear ONCE!\n\ /// 2 CONT Note continued here. The word TE\n\ +/// 2 CONT /// 2 CONC ST should not be broken!\n\ /// 0 TRLR"; @@ -47,7 +48,7 @@ use serde::{Deserialize, Serialize}; /// let data = doc.parse_document(); /// let note = data.header.unwrap().note.unwrap(); -/// assert_eq!(note.value.unwrap().chars().count(), 1440); +/// assert_eq!(note.value.unwrap().chars().count(), 1441); /// ``` #[derive(Clone, Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] diff --git a/src/types/source.rs b/src/types/source.rs index 3f23420..bb8bb13 100644 --- a/src/types/source.rs +++ b/src/types/source.rs @@ -2,7 +2,8 @@ use crate::{ parse_subset, tokenizer::{Token, Tokenizer}, types::{ - ChangeDate, Date, EventDetail, MultimediaRecord, Note, RepoCitation, UserDefinedData, Xref, + ChangeDate, Date, EventDetail, MultimediaRecord, Note, RepoCitation, UserDefinedDataset, + Xref, }, Parser, }; @@ -25,7 +26,9 @@ pub struct Source { pub multimedia: Vec, pub notes: Vec, pub repo_citations: Vec, - pub custom_data: Vec, + /// handles "RFN" tag; found in Ancestry.com export + pub submitter_registered_rfn: Option, + pub custom_data: Vec>, } impl Source { @@ -81,6 +84,7 @@ impl Parser for Source { "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)), "NOTE" => self.add_note(Note::new(tokenizer, level + 1)), "REPO" => self.add_repo_citation(RepoCitation::new(tokenizer, level + 1)), + "RFN" => self.submitter_registered_rfn = Some(tokenizer.take_line_value()), _ => panic!("{} Unhandled Source Tag: {}", tokenizer.debug(), tag), } }; @@ -135,7 +139,10 @@ pub struct SourceCitation { pub data: Option, pub note: Option, pub certainty_assessment: Option, - pub custom_data: Vec, + /// handles "RFN" tag; found in Ancestry.com export + pub submitter_registered_rfn: Option, + pub multimedia: Vec, + pub custom_data: Vec>, } impl SourceCitation { @@ -147,29 +154,44 @@ impl SourceCitation { data: None, note: None, certainty_assessment: None, + multimedia: Vec::new(), custom_data: Vec::new(), + submitter_registered_rfn: None, }; citation.parse(tokenizer, level); citation } + + pub fn add_multimedia(&mut self, m: MultimediaRecord) { + self.multimedia.push(m); + } } impl Parser for SourceCitation { fn parse(&mut self, tokenizer: &mut Tokenizer, level: u8) { tokenizer.next_token(); - let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| match tag { - "PAGE" => self.page = Some(tokenizer.take_continued_text(level + 1)), - "DATA" => self.data = Some(SourceCitationData::new(tokenizer, level + 1)), - "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), - "QUAY" => { - self.certainty_assessment = Some(CertaintyAssessment::new(tokenizer, level + 1)) + let handle_subset = |tag: &str, tokenizer: &mut Tokenizer| { + let mut pointer: Option = None; + if let Token::Pointer(xref) = &tokenizer.current_token { + pointer = Some(xref.to_string()); + tokenizer.next_token(); + } + match tag { + "PAGE" => self.page = Some(tokenizer.take_continued_text(level + 1)), + "DATA" => self.data = Some(SourceCitationData::new(tokenizer, level + 1)), + "NOTE" => self.note = Some(Note::new(tokenizer, level + 1)), + "QUAY" => { + self.certainty_assessment = Some(CertaintyAssessment::new(tokenizer, level + 1)) + } + "RFN" => self.submitter_registered_rfn = Some(tokenizer.take_line_value()), + "OBJE" => self.add_multimedia(MultimediaRecord::new(tokenizer, level + 1, pointer)), + _ => panic!( + "{} Unhandled SourceCitation Tag: {}", + tokenizer.debug(), + tag + ), } - _ => panic!( - "{} Unhandled SourceCitation Tag: {}", - tokenizer.debug(), - tag - ), }; self.custom_data = parse_subset(tokenizer, level, handle_subset); } diff --git a/src/types/submission.rs b/src/types/submission.rs index 7b84afa..c92f656 100644 --- a/src/types/submission.rs +++ b/src/types/submission.rs @@ -1,7 +1,7 @@ use crate::{ parse_subset, tokenizer::Tokenizer, - types::{ChangeDate, Note, UserDefinedData, Xref}, + types::{ChangeDate, Note, UserDefinedDataset, Xref}, Parser, }; @@ -49,7 +49,7 @@ pub struct Submission { pub automated_record_id: Option, pub note: Option, pub change_date: Option, - pub custom_data: Vec, + pub custom_data: Vec>, } impl Submission { diff --git a/src/types/submitter.rs b/src/types/submitter.rs index 0959e4f..199c8d1 100644 --- a/src/types/submitter.rs +++ b/src/types/submitter.rs @@ -1,6 +1,6 @@ use crate::{ tokenizer::{Token, Tokenizer}, - types::{Address, ChangeDate, MultimediaLink, Note, UserDefinedData, Xref}, + types::{Address, ChangeDate, MultimediaLink, Note, UserDefinedDataset, Xref}, Parser, parse_subset, }; @@ -37,7 +37,7 @@ pub struct Submitter { pub note: Option, /// Phone number of the submitter pub phone: Option, - pub custom_data: Vec, + pub custom_data: Vec>, } impl Submitter { From fd4586745c490b1ee1b419259de5283897083b81 Mon Sep 17 00:00:00 2001 From: ge3224 Date: Mon, 12 Dec 2022 16:54:58 -0600 Subject: [PATCH 54/55] Add more documentation individual types --- src/types/individual.rs | 62 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/types/individual.rs b/src/types/individual.rs index d746230..edfec15 100644 --- a/src/types/individual.rs +++ b/src/types/individual.rs @@ -11,7 +11,33 @@ use crate::{ #[cfg(feature = "json")] use serde::{Deserialize, Serialize}; -/// A Person within the family tree +/// Individual (tag: INDI) represents a compilation of facts or hypothesized facts about an +/// individual. These facts may come from multiple sources. Source citations and notes allow +/// documentation of the source where each of the facts were discovered. See +/// https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#INDIVIDUAL_RECORD. +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 NAME John Doe\n\ +/// 1 SEX M\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let indi = &data.individuals[0]; +/// assert_eq!(indi.xref.as_ref().unwrap(), "@PERSON1@"); +/// assert_eq!(indi.name.as_ref().unwrap().value.as_ref().unwrap(), "John Doe"); +/// assert_eq!(indi.sex.as_ref().unwrap().value.to_string(), "Male"); +/// ``` +/// #[derive(Debug, Default)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Individual { @@ -302,7 +328,7 @@ impl ToString for AdoptedByWhichParent { /// /// # Example /// -/// ```rust +/// ``` /// use gedcom::GedcomDocument; /// let sample = "\ /// 0 HEAD\n\ @@ -411,6 +437,38 @@ impl Parser for FamilyLink { } } +/// Name (tag: NAME) refers to the names of individuals, which are represented in the manner the +/// name is normally spoken, with the family name, surname, or nearest cultural parallel thereunto +/// separated by slashes (U+002F /). Based on the dynamic nature or unknown compositions of naming +/// conventions, it is difficult to provide a more detailed name piece structure to handle every +/// case. The PERSONAL_NAME_PIECES are provided optionally for systems that cannot operate +/// effectively with less structured information. The Personal Name payload shall be seen as the +/// primary name representation, with name pieces as optional auxiliary information; in particular +/// it is recommended that all name parts in PERSONAL_NAME_PIECES appear within the PersonalName +/// payload in some form, possibly adjusted for gender-specific suffixes or the like. It is +/// permitted for the payload to contain information not present in any name piece substructure. +/// See https://gedcom.io/specifications/FamilySearchGEDCOMv7.html#PERSONAL_NAME_STRUCTURE +/// +/// # Example +/// +/// ``` +/// use gedcom::GedcomDocument; +/// let sample = "\ +/// 0 HEAD\n\ +/// 1 GEDC\n\ +/// 2 VERS 5.5\n\ +/// 0 @PERSON1@ INDI\n\ +/// 1 NAME John Doe\n\ +/// 0 TRLR"; +/// +/// let mut doc = GedcomDocument::new(sample.chars()); +/// let data = doc.parse_document(); +/// +/// let indi = &data.individuals[0]; +/// assert_eq!(indi.xref.as_ref().unwrap(), "@PERSON1@"); +/// assert_eq!(indi.name.as_ref().unwrap().value.as_ref().unwrap(), "John Doe"); +/// ``` +/// #[derive(Debug)] #[cfg_attr(feature = "json", derive(Serialize, Deserialize))] pub struct Name { From 7f2cc5510a88820d803d2c36a167fe886c22c640 Mon Sep 17 00:00:00 2001 From: Jacob <75739874+ge3224@users.noreply.github.com> Date: Thu, 5 Jan 2023 08:24:36 -0600 Subject: [PATCH 55/55] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0c749b2..aa5a074 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ gedcom = { version = "", features = ["json"] } ## 🚧 Progress 🚧 -There are still parts of the specification not yet implemented, and the project is subject to change. The way development has been happening is by taking a GEDCOM file, attempting to parse it and acting on whatever errors or omissions occur. In its current state, it is capable of parsing the [Heiner Eichmann's](http://heiner-eichmann.de/gedcom/allged.htm) [`allged.ged`](tests/fixtures/allged.ged) in its entirety. +There are still parts of the specification not yet implemented, and the project is subject to change. The way development has been happening is by taking a GEDCOM file, attempting to parse it and acting on whatever errors or omissions occur. In its current state, it is capable of parsing [Heiner Eichmann's](http://heiner-eichmann.de/gedcom/allged.htm) [`allged.ged`](tests/fixtures/allged.ged) in its entirety. Here are some notes about parsed data & tags. Page references are to the [Gedcom 5.5.1 specification](https://edge.fscdn.org/assets/img/documents/ged551-5bac5e57fe88dd37df0e153d9c515335.pdf).