Skip to content

Commit e31b2bf

Browse files
committed
Add trim option to deal with pretty-printed XML files
1 parent 004c100 commit e31b2bf

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

src/main.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ struct Column<'a> {
6868
hide: bool,
6969
include: Option<Regex>,
7070
exclude: Option<Regex>,
71+
trim: bool,
7172
convert: Option<&'a str>,
7273
find: Option<&'a str>,
7374
replace: Option<&'a str>,
@@ -200,6 +201,7 @@ fn add_table<'a>(rowpath: &str, outfile: Option<&str>, filemode: &str, skip: Opt
200201
let hide = col["hide"].as_bool().unwrap_or(false);
201202
let include: Option<Regex> = col["incl"].as_str().map(|str| Regex::new(str).unwrap_or_else(|err| fatalerr!("Error: invalid regex in 'incl' entry in configuration file: {}", err)));
202203
let exclude: Option<Regex> = col["excl"].as_str().map(|str| Regex::new(str).unwrap_or_else(|err| fatalerr!("Error: invalid regex in 'excl' entry in configuration file: {}", err)));
204+
let trim = col["trim"].as_bool().unwrap_or(false);
203205
let attr = col["attr"].as_str();
204206
let convert = col["conv"].as_str();
205207
let find = col["find"].as_str();
@@ -231,7 +233,7 @@ fn add_table<'a>(rowpath: &str, outfile: Option<&str>, filemode: &str, skip: Opt
231233
eprintln!("Warning: the bbox option has no function without conversion type 'gml-to-ekwb'");
232234
}
233235

234-
let column = Column { name: name.to_string(), path, value: RefCell::new(String::new()), attr, hide, include, exclude, convert, find, replace, consol, subtable, bbox, multitype };
236+
let column = Column { name: name.to_string(), path, value: RefCell::new(String::new()), attr, hide, include, exclude, trim, convert, find, replace, consol, subtable, bbox, multitype };
235237
table.columns.push(column);
236238
}
237239
table
@@ -285,6 +287,7 @@ fn main() {
285287
let mut gmltoewkb = false;
286288
let mut gmlpos = false;
287289
let mut gmlcoll: Vec<Geometry> = vec![];
290+
let trimre = Regex::new("[ \n\r\t]*\n[ \n\r\t]*").unwrap();
288291

289292
let start = Instant::now();
290293
loop {
@@ -452,7 +455,13 @@ fn main() {
452455
}
453456
let unescaped = e.unescaped().unwrap_or_else(|err| fatalerr!("Error: failed to unescape XML text node '{}': {}", String::from_utf8_lossy(e), err));
454457
let decoded = reader.decode(&unescaped).unwrap_or_else(|err| fatalerr!("Error: failed to decode XML text node '{}': {}", String::from_utf8_lossy(e), err));
455-
table.columns[i].value.borrow_mut().push_str(&decoded.cow_replace("\\", "\\\\").cow_replace("\r", "\\r").cow_replace("\n", "\\n").cow_replace("\t", "\\t"));
458+
if table.columns[i].trim {
459+
let trimmed = trimre.replace_all(decoded, " ");
460+
table.columns[i].value.borrow_mut().push_str(&trimmed.cow_replace("\\", "\\\\").cow_replace("\t", "\\t"));
461+
}
462+
else {
463+
table.columns[i].value.borrow_mut().push_str(&decoded.cow_replace("\\", "\\\\").cow_replace("\r", "\\r").cow_replace("\n", "\\n").cow_replace("\t", "\\t"));
464+
}
456465
if let Some(re) = &table.columns[i].include {
457466
if !re.is_match(&table.columns[i].value.borrow()) {
458467
filtered = true;

0 commit comments

Comments
 (0)