Permalink
Showing
with
101 additions
and 1 deletion.
- +7 −1 README.md
- +50 −0 create_table_example.sql
- +44 −0 query_example.sql
@@ -1,2 +1,8 @@ | ||
# dlt-billing-csv-processing-with-aws-athena | ||
Example on how to process DLT csv detailed billing reports with AWS Athena | ||
Example on how to query DLT csv detailed billing reports with AWS Athena | ||
|
||
Contents: | ||
---------- | ||
- create_table_example.sql: Creates a table that matches the column format of a DLT details billing csv file. Due to the potential for quotes, OpenCSVSerde (http://docs.aws.amazon.com/athena/latest/ug/csv.html) must be used. The result is all field values are imported as STRINGS. Conversion of field values to different data types are possible on querying of data. | ||
|
||
- query_example.sql: A sample query to retrieve the first ten 'LineItem' rows found in the database. |
@@ -0,0 +1,50 @@ | ||
CREATE EXTERNAL TABLE IF NOT EXISTS dltBillingData.utilization ( | ||
`InvoiceID` STRING, | ||
`PayerAccountId` STRING, | ||
`LinkedAccountId` STRING, | ||
`RecordType` STRING, | ||
`RecordId` STRING, | ||
`ProductName` STRING, | ||
`RateId` STRING, | ||
`SubscriptionId` STRING, | ||
`PricingPlanId` STRING, | ||
`UsageType` STRING, | ||
`Operation` STRING, | ||
`AvailabilityZone` STRING, | ||
`ReservedInstance` STRING, | ||
`ItemDescription` STRING, | ||
`UsageStartDate` STRING, | ||
`UsageEndDate` STRING, | ||
`UsageQuantity` STRING, | ||
`BlendedRate` STRING, | ||
`BlendedCost` STRING, | ||
`UnBlendedRate` STRING, | ||
`UnBlendedCost` STRING, | ||
`ResourceId` STRING, | ||
`user:Application` STRING, | ||
`user:CA001` STRING, | ||
`user:CostCode` STRING, | ||
`user:CreationDate` STRING, | ||
`user:Creator` STRING, | ||
`user:Department` STRING, | ||
`user:DeptCode` STRING, | ||
`user:Environment` STRING, | ||
`user:Location` STRING, | ||
`user:Name` STRING, | ||
`user:Organization` STRING, | ||
`user:Owner` STRING, | ||
`user:Product` STRING, | ||
`user:Project` STRING, | ||
`user:Purpose` STRING, | ||
`user:ResponsibleParty` STRING, | ||
`user:Role` STRING, | ||
`user:Use` STRING | ||
) | ||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' | ||
WITH SERDEPROPERTIES ( | ||
"separatorChar" = ",", | ||
"quoteChar" = "\"", | ||
"escapeChar" = "\\" | ||
) LOCATION 's3://<aws-account-id>-dlt-utilization/<path-to-csv-files>' | ||
TBLPROPERTIES ('has_encrypted_data'='false'); | ||
@@ -0,0 +1,44 @@ | ||
SELECT | ||
InvoiceID AS invoice_id, | ||
PayerAccountId AS payer_account_id, | ||
LinkedAccountId AS linked_account_id, | ||
RecordType AS record_type, | ||
RecordId AS record_id, | ||
ProductName AS product_name, | ||
RateId AS rate_id, | ||
SubscriptionId AS subscription_id, | ||
PricingPlanId AS pricing_plan_id, | ||
UsageType AS usage_type, | ||
Operation AS operation, | ||
AvailabilityZone AS availability_zone, | ||
ReservedInstance AS reserved_instance, | ||
ItemDescription AS item_description, | ||
cast(UsageStartDate AS TIMESTAMP) AS usage_start_date, | ||
cast(UsageEndDate AS TIMESTAMP) AS usage_end_date, | ||
cast(UsageQuantity AS DOUBLE) AS dbl_usagequantity, | ||
cast(BlendedRate AS DOUBLE) AS blended_rate, | ||
cast(BlendedCost AS DOUBLE) AS blended_cost, | ||
cast(UnBlendedRate AS DOUBLE) AS unblended_rate, | ||
cast(UnBlendedCost AS DOUBLE) AS unblended_cost, | ||
ResourceId AS resource_id, | ||
"user:Application" AS user_defined_application, | ||
"user:CA001" AS user_defined_ca001, | ||
"user:CostCode" AS user_defined_costcode, | ||
"user:CreationDate" AS user_defined_creation_date, | ||
"user:Creator" AS user_defined_creator, | ||
"user:Department" AS user_defined_dept, | ||
"user:DeptCode" AS user_defined_dept_code, | ||
"user:Environment" AS user_defined_env, | ||
"user:Location" AS user_defined_loc, | ||
"user:Name" AS user_defined_name, | ||
"user:Organization" AS user_defined_org, | ||
"user:Owner" AS user_defined_owner, | ||
"user:Product" AS user_defined_product, | ||
"user:Project" AS user_defined_project, | ||
"user:Purpose" AS user_defined_purpose, | ||
"user:ResponsibleParty" AS user_defined_responsible_party, | ||
"user:Role" AS user_defined_role, | ||
"user:Use" AS user_defined_use | ||
FROM "dltbillingdata"."utilization" | ||
WHERE invoiceid <> 'InvoiceID' | ||
AND recordtype = 'LineItem' limit 10; |