AWS Athena support for microseconds in the TIMESTAMP data type

0

Hi, I'd appreciate AWS Athena support for TIMESTAMP data type with microsecond precision for all row formats and table engines. Currently, the support is very inconsistent. See the SQL script below.

drop table if exists test_csv;
create external table if not exists test_csv (
        id int,
    created_time timestamp
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties('separatorChar'=',', 'quoteChar'='"', 'escapeChar'='\\')
location 's3://my-bucket/tmp/timestamp_csv_test/';

-- result: OK

drop table if exists test_parquet;
create external table if not exists test_parquet (
    id int,
    created_time timestamp
)
row format serde 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
stored as inputformat 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
location 's3://my-bucket/tmp/timestamp_parquet_test/'
tblproperties ('parquet.compress' = 'snappy');

-- result: OK

drop table if exists test_iceberg;
create table if not exists test_iceberg (
    id int,
    created_time timestamp
)
location 's3://my-bucket/tmp/timestamp_iceberg_test/'
tblproperties ( 'table_type' ='iceberg');

-- result: OK

insert into test_csv values (1, timestamp '2023-03-22 11:00:00.123456');

/*
result: ERROR [HY000][100071] [Simba][AthenaJDBC](100071) An error has been thrown from the AWS Athena client. GENERIC_INTERNAL_ERROR: class org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector cannot be cast to class org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector and org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector are in unnamed module of loader io.trino.server.PluginClassLoader @1df1bd44). If a data manifest file was generated at 's3://my-bucket/athena_results/ad44adee-2a80-4f41-906a-17aa5dc27730-manifest.csv', you may need to manually clean the data from locations specified in the manifest. Athena will not delete data in your account. [Execution ID: ***]
*/

insert into test_parquet values (1, timestamp '2023-03-22 11:00:00.123456');

-- result: OK

select * from test_parquet;

-- result: OK DATA: 1,2023-03-22 11:00:00.123000 BUT THE TIMESTAMP VALUE IS TRUNCATED TO MILLISECONDS!

insert into test_iceberg values (1, timestamp '2023-03-22 11:00:00.123456');

-- result: OK

select * from test_csv;

select * from test_iceberg;

-- result: OK DATA:  1,2023-03-22 11:00:00.123456 THIS IS FINE

asked a year ago133 views
No Answers

You are not logged in. Log in to post an answer.

A good answer clearly answers the question and provides constructive feedback and encourages professional growth in the question asker.

Guidelines for Answering Questions