Skip to content
Permalink
Browse files

First write-up

  • Loading branch information
Paul Crum
Paul Crum committed Dec 11, 2017
1 parent b3ea5a1 commit c581131a1f012477ccef1d06fc3282093d87daab
Showing with 101 additions and 1 deletion.
  1. +7 −1 README.md
  2. +50 −0 create_table_example.sql
  3. +44 −0 query_example.sql
@@ -1,2 +1,8 @@
# dlt-billing-csv-processing-with-aws-athena
Example on how to process DLT csv detailed billing reports with AWS Athena
Example on how to query DLT csv detailed billing reports with AWS Athena

Contents:
----------
- create_table_example.sql: Creates a table that matches the column format of a DLT details billing csv file. Due to the potential for quotes, OpenCSVSerde (http://docs.aws.amazon.com/athena/latest/ug/csv.html) must be used. The result is all field values are imported as STRINGS. Conversion of field values to different data types are possible on querying of data.

- query_example.sql: A sample query to retrieve the first ten 'LineItem' rows found in the database.
@@ -0,0 +1,50 @@
CREATE EXTERNAL TABLE IF NOT EXISTS dltBillingData.utilization (
`InvoiceID` STRING,
`PayerAccountId` STRING,
`LinkedAccountId` STRING,
`RecordType` STRING,
`RecordId` STRING,
`ProductName` STRING,
`RateId` STRING,
`SubscriptionId` STRING,
`PricingPlanId` STRING,
`UsageType` STRING,
`Operation` STRING,
`AvailabilityZone` STRING,
`ReservedInstance` STRING,
`ItemDescription` STRING,
`UsageStartDate` STRING,
`UsageEndDate` STRING,
`UsageQuantity` STRING,
`BlendedRate` STRING,
`BlendedCost` STRING,
`UnBlendedRate` STRING,
`UnBlendedCost` STRING,
`ResourceId` STRING,
`user:Application` STRING,
`user:CA001` STRING,
`user:CostCode` STRING,
`user:CreationDate` STRING,
`user:Creator` STRING,
`user:Department` STRING,
`user:DeptCode` STRING,
`user:Environment` STRING,
`user:Location` STRING,
`user:Name` STRING,
`user:Organization` STRING,
`user:Owner` STRING,
`user:Product` STRING,
`user:Project` STRING,
`user:Purpose` STRING,
`user:ResponsibleParty` STRING,
`user:Role` STRING,
`user:Use` STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
"separatorChar" = ",",
"quoteChar" = "\"",
"escapeChar" = "\\"
) LOCATION 's3://<aws-account-id>-dlt-utilization/<path-to-csv-files>'
TBLPROPERTIES ('has_encrypted_data'='false');
@@ -0,0 +1,44 @@
SELECT
InvoiceID AS invoice_id,
PayerAccountId AS payer_account_id,
LinkedAccountId AS linked_account_id,
RecordType AS record_type,
RecordId AS record_id,
ProductName AS product_name,
RateId AS rate_id,
SubscriptionId AS subscription_id,
PricingPlanId AS pricing_plan_id,
UsageType AS usage_type,
Operation AS operation,
AvailabilityZone AS availability_zone,
ReservedInstance AS reserved_instance,
ItemDescription AS item_description,
cast(UsageStartDate AS TIMESTAMP) AS usage_start_date,
cast(UsageEndDate AS TIMESTAMP) AS usage_end_date,
cast(UsageQuantity AS DOUBLE) AS dbl_usagequantity,
cast(BlendedRate AS DOUBLE) AS blended_rate,
cast(BlendedCost AS DOUBLE) AS blended_cost,
cast(UnBlendedRate AS DOUBLE) AS unblended_rate,
cast(UnBlendedCost AS DOUBLE) AS unblended_cost,
ResourceId AS resource_id,
"user:Application" AS user_defined_application,
"user:CA001" AS user_defined_ca001,
"user:CostCode" AS user_defined_costcode,
"user:CreationDate" AS user_defined_creation_date,
"user:Creator" AS user_defined_creator,
"user:Department" AS user_defined_dept,
"user:DeptCode" AS user_defined_dept_code,
"user:Environment" AS user_defined_env,
"user:Location" AS user_defined_loc,
"user:Name" AS user_defined_name,
"user:Organization" AS user_defined_org,
"user:Owner" AS user_defined_owner,
"user:Product" AS user_defined_product,
"user:Project" AS user_defined_project,
"user:Purpose" AS user_defined_purpose,
"user:ResponsibleParty" AS user_defined_responsible_party,
"user:Role" AS user_defined_role,
"user:Use" AS user_defined_use
FROM "dltbillingdata"."utilization"
WHERE invoiceid <> 'InvoiceID'
AND recordtype = 'LineItem' limit 10;

0 comments on commit c581131

Please sign in to comment.
You can’t perform that action at this time.