@@ -28,7 +28,6 @@ use arrow_array::RecordBatch;
28
28
use arrow_schema:: { DataType , Field , Schema , TimeUnit } ;
29
29
use chrono:: { DateTime , Utc } ;
30
30
use serde:: { Deserialize , Serialize } ;
31
- use serde_json:: Value ;
32
31
33
32
use crate :: {
34
33
metadata:: SchemaVersion ,
@@ -40,19 +39,6 @@ use super::{Event, DEFAULT_TIMESTAMP_KEY};
40
39
41
40
pub mod json;
42
41
43
- static TIME_FIELD_NAME_PARTS : [ & str ; 11 ] = [
44
- "time" ,
45
- "date" ,
46
- "timestamp" ,
47
- "created" ,
48
- "received" ,
49
- "ingested" ,
50
- "collected" ,
51
- "start" ,
52
- "end" ,
53
- "ts" ,
54
- "dt" ,
55
- ] ;
56
42
type EventSchema = Vec < Arc < Field > > ;
57
43
58
44
/// Source of the logs, used to perform special processing for certain sources
@@ -167,8 +153,7 @@ pub trait EventFormat: Sized {
167
153
if !Self :: is_schema_matching ( new_schema. clone ( ) , storage_schema, static_schema_flag) {
168
154
return Err ( anyhow ! ( "Schema mismatch" ) ) ;
169
155
}
170
- new_schema =
171
- update_field_type_in_schema ( new_schema, None , time_partition, None , schema_version) ;
156
+ new_schema = update_field_type_in_schema ( new_schema, None , time_partition) ;
172
157
173
158
let rb = Self :: decode ( data, new_schema. clone ( ) ) ?;
174
159
@@ -269,8 +254,6 @@ pub fn update_field_type_in_schema(
269
254
inferred_schema : Arc < Schema > ,
270
255
existing_schema : Option < & HashMap < String , Arc < Field > > > ,
271
256
time_partition : Option < & String > ,
272
- log_records : Option < & Vec < Value > > ,
273
- schema_version : SchemaVersion ,
274
257
) -> Arc < Schema > {
275
258
let mut updated_schema = inferred_schema. clone ( ) ;
276
259
let existing_field_names = get_existing_field_names ( inferred_schema. clone ( ) , existing_schema) ;
@@ -280,13 +263,6 @@ pub fn update_field_type_in_schema(
280
263
updated_schema = override_existing_timestamp_fields ( existing_schema, updated_schema) ;
281
264
}
282
265
283
- if let Some ( log_records) = log_records {
284
- for log_record in log_records {
285
- updated_schema =
286
- override_data_type ( updated_schema. clone ( ) , log_record. clone ( ) , schema_version) ;
287
- }
288
- }
289
-
290
266
let Some ( time_partition) = time_partition else {
291
267
return updated_schema;
292
268
} ;
@@ -309,51 +285,3 @@ pub fn update_field_type_in_schema(
309
285
. collect ( ) ;
310
286
Arc :: new ( Schema :: new ( new_schema) )
311
287
}
312
-
313
- // From Schema v1 onwards, convert json fields with name containig "date"/"time" and having
314
- // a string value parseable into timestamp as timestamp type and all numbers as float64.
315
- pub fn override_data_type (
316
- inferred_schema : Arc < Schema > ,
317
- log_record : Value ,
318
- schema_version : SchemaVersion ,
319
- ) -> Arc < Schema > {
320
- let Value :: Object ( map) = log_record else {
321
- return inferred_schema;
322
- } ;
323
- let updated_schema: Vec < Field > = inferred_schema
324
- . fields ( )
325
- . iter ( )
326
- . map ( |field| {
327
- let field_name = field. name ( ) . as_str ( ) ;
328
- match ( schema_version, map. get ( field. name ( ) ) ) {
329
- // in V1 for new fields in json named "time"/"date" or such and having inferred
330
- // type string, that can be parsed as timestamp, use the timestamp type.
331
- // NOTE: support even more datetime string formats
332
- ( SchemaVersion :: V1 , Some ( Value :: String ( s) ) )
333
- if TIME_FIELD_NAME_PARTS
334
- . iter ( )
335
- . any ( |part| field_name. to_lowercase ( ) . contains ( part) )
336
- && field. data_type ( ) == & DataType :: Utf8
337
- && ( DateTime :: parse_from_rfc3339 ( s) . is_ok ( )
338
- || DateTime :: parse_from_rfc2822 ( s) . is_ok ( ) ) =>
339
- {
340
- // Update the field's data type to Timestamp
341
- Field :: new (
342
- field_name,
343
- DataType :: Timestamp ( TimeUnit :: Millisecond , None ) ,
344
- true ,
345
- )
346
- }
347
- // in V1 for new fields in json with inferred type number, cast as float64.
348
- ( SchemaVersion :: V1 , Some ( Value :: Number ( _) ) ) if field. data_type ( ) . is_numeric ( ) => {
349
- // Update the field's data type to Float64
350
- Field :: new ( field_name, DataType :: Float64 , true )
351
- }
352
- // Return the original field if no update is needed
353
- _ => Field :: new ( field_name, field. data_type ( ) . clone ( ) , true ) ,
354
- }
355
- } )
356
- . collect ( ) ;
357
-
358
- Arc :: new ( Schema :: new ( updated_schema) )
359
- }
0 commit comments