Skip to content
Permalink
12d11a0a27
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
110 lines (83 sloc) 3.42 KB
import json
import get_folders_native
import get_projects_native
import pandas as pd
from settings import TABLE
dataset = TABLE.split('.')[0]
table = TABLE.split('.')[1]
def get_entry(member, role, project):
parts = member.split(':')
type = parts[0]
email = parts[1]
parent = project.parent.split('/')
entry = {
'project': {"createTime": project.create_time, "lifecycleState": project.state.name, "name": project.display_name, "parent": {"id": parent[1], "type": parent[0]}, "projectId": project.project_id, "projectNumber": project.name.split("/")[1]},
'role': role,
'type': type,
'email': email
}
parent = project.parent.split('/')
if parent[0][:-1] == 'folder':
parent_id = parent[1]
folder = folders.get(parent_id)
entry['project']['parent']['folder'] = folder['folder'].display_name
entry['project']['parent']['path'] = folder['path']
return entry
def write_to_bigquery(data):
print(data)
import io
data_as_file = io.StringIO(data)
schema = []
with open('schema.json', 'r') as schemafile:
schema_json = json.loads(schemafile.read())
from google.cloud import bigquery
for field in schema_json:
schema.append(bigquery.SchemaField(field.get('name'), field.get('type'), field.get('mode'), field.get('description')))
bq_client = bigquery.Client()
table_ref = bq_client.dataset(dataset).table(table)
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = schema
load_job = bq_client.load_table_from_file(
data_as_file, table_ref, job_config=job_config
) # API request
result = load_job.result()
projects_by_user = {}
entries = []
folders = {}
df = pd.DataFrame()
folders = get_folders_native.folders
projects = get_projects_native.projects
for project_id in projects.keys():
proj = projects[project_id]['project']
try:
for binding in projects[project_id]['iam_policy'].bindings:
for member in binding.members:
entry = get_entry(member=member,
role=binding.role,
project=proj)
entry_df = pd.json_normalize(entry)
entries.append(entry_df)
#local_part = entry['email'].split('@')[0].lower()
#if not projects_by_user.get(local_part):
# projects_by_user[local_part] = []
#projects_by_user[local_part].append(entry)
except:
pass
#print(json.dumps(projects_by_user))
#df = df.append(other=entries)
df = pd.concat(entries)
# get rid of the .'s in the column names created by json_normalize
df.columns = df.columns.str.replace(r".", "_")
df.columns = df.columns.str.replace("-", "_")
# add the date of the audit so we can create a time series
df['audit_time'] = pd.Timestamp.now().isoformat()
# convert all field values to string type
df = df.astype(str)
# workaround for pandas v1.1.1, due to the fact that astype(str) will convert a np.nan to the literal string 'nan'....
# so we'll just flip it back to a none type....
df = df.replace(['nan'], [None])
#output to row delimited json
data = df.to_json(orient='records', lines=True, date_format='iso')
write_to_bigquery(data)