Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
gcp-gce-project-audit-bq/owner_report_native.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
110 lines (83 sloc)
3.42 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import get_folders_native | |
import get_projects_native | |
import pandas as pd | |
from settings import TABLE | |
dataset = TABLE.split('.')[0] | |
table = TABLE.split('.')[1] | |
def get_entry(member, role, project): | |
parts = member.split(':') | |
type = parts[0] | |
email = parts[1] | |
parent = project.parent.split('/') | |
entry = { | |
'project': {"createTime": project.create_time, "lifecycleState": project.state.name, "name": project.display_name, "parent": {"id": parent[1], "type": parent[0]}, "projectId": project.project_id, "projectNumber": project.name.split("/")[1]}, | |
'role': role, | |
'type': type, | |
'email': email | |
} | |
parent = project.parent.split('/') | |
if parent[0][:-1] == 'folder': | |
parent_id = parent[1] | |
folder = folders.get(parent_id) | |
entry['project']['parent']['folder'] = folder['folder'].display_name | |
entry['project']['parent']['path'] = folder['path'] | |
return entry | |
def write_to_bigquery(data): | |
print(data) | |
import io | |
data_as_file = io.StringIO(data) | |
schema = [] | |
with open('schema.json', 'r') as schemafile: | |
schema_json = json.loads(schemafile.read()) | |
from google.cloud import bigquery | |
for field in schema_json: | |
schema.append(bigquery.SchemaField(field.get('name'), field.get('type'), field.get('mode'), field.get('description'))) | |
bq_client = bigquery.Client() | |
table_ref = bq_client.dataset(dataset).table(table) | |
job_config = bigquery.LoadJobConfig() | |
job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE | |
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON | |
job_config.schema = schema | |
load_job = bq_client.load_table_from_file( | |
data_as_file, table_ref, job_config=job_config | |
) # API request | |
result = load_job.result() | |
projects_by_user = {} | |
entries = [] | |
folders = {} | |
df = pd.DataFrame() | |
folders = get_folders_native.folders | |
projects = get_projects_native.projects | |
for project_id in projects.keys(): | |
proj = projects[project_id]['project'] | |
try: | |
for binding in projects[project_id]['iam_policy'].bindings: | |
for member in binding.members: | |
entry = get_entry(member=member, | |
role=binding.role, | |
project=proj) | |
entry_df = pd.json_normalize(entry) | |
entries.append(entry_df) | |
#local_part = entry['email'].split('@')[0].lower() | |
#if not projects_by_user.get(local_part): | |
# projects_by_user[local_part] = [] | |
#projects_by_user[local_part].append(entry) | |
except: | |
pass | |
#print(json.dumps(projects_by_user)) | |
#df = df.append(other=entries) | |
df = pd.concat(entries) | |
# get rid of the .'s in the column names created by json_normalize | |
df.columns = df.columns.str.replace(r".", "_") | |
df.columns = df.columns.str.replace("-", "_") | |
# add the date of the audit so we can create a time series | |
df['audit_time'] = pd.Timestamp.now().isoformat() | |
# convert all field values to string type | |
df = df.astype(str) | |
# workaround for pandas v1.1.1, due to the fact that astype(str) will convert a np.nan to the literal string 'nan'.... | |
# so we'll just flip it back to a none type.... | |
df = df.replace(['nan'], [None]) | |
#output to row delimited json | |
data = df.to_json(orient='records', lines=True, date_format='iso') | |
write_to_bigquery(data) |