AWS Athena support for microseconds in the TIMESTAMP data type

0

Hi, I'd appreciate AWS Athena support for TIMESTAMP data type with microsecond precision for all row formats and table engines. Currently, the support is very inconsistent. See the SQL script below.

drop table if exists test_csv;
create external table if not exists test_csv (
        id int,
    created_time timestamp
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties('separatorChar'=',', 'quoteChar'='"', 'escapeChar'='\\')
location 's3://my-bucket/tmp/timestamp_csv_test/';

-- result: OK

drop table if exists test_parquet;
create external table if not exists test_parquet (
    id int,
    created_time timestamp
)
row format serde 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
stored as inputformat 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
location 's3://my-bucket/tmp/timestamp_parquet_test/'
tblproperties ('parquet.compress' = 'snappy');

-- result: OK

drop table if exists test_iceberg;
create table if not exists test_iceberg (
    id int,
    created_time timestamp
)
location 's3://my-bucket/tmp/timestamp_iceberg_test/'
tblproperties ( 'table_type' ='iceberg');

-- result: OK

insert into test_csv values (1, timestamp '2023-03-22 11:00:00.123456');

/*
result: ERROR [HY000][100071] [Simba][AthenaJDBC](100071) An error has been thrown from the AWS Athena client. GENERIC_INTERNAL_ERROR: class org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector cannot be cast to class org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector and org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector are in unnamed module of loader io.trino.server.PluginClassLoader @1df1bd44). If a data manifest file was generated at 's3://my-bucket/athena_results/ad44adee-2a80-4f41-906a-17aa5dc27730-manifest.csv', you may need to manually clean the data from locations specified in the manifest. Athena will not delete data in your account. [Execution ID: ***]
*/

insert into test_parquet values (1, timestamp '2023-03-22 11:00:00.123456');

-- result: OK

select * from test_parquet;

-- result: OK DATA: 1,2023-03-22 11:00:00.123000 BUT THE TIMESTAMP VALUE IS TRUNCATED TO MILLISECONDS!

insert into test_iceberg values (1, timestamp '2023-03-22 11:00:00.123456');

-- result: OK

select * from test_csv;

select * from test_iceberg;

-- result: OK DATA:  1,2023-03-22 11:00:00.123456 THIS IS FINE

已提问 1 年前138 查看次数
没有答案

您未登录。 登录 发布回答。

一个好的回答可以清楚地解答问题和提供建设性反馈,并能促进提问者的职业发展。

回答问题的准则