From fdca5668748ddda5c0f743e4cd2e83ce1a1d654c Mon Sep 17 00:00:00 2001 From: Gabriel Geise Date: Wed, 6 Oct 2021 19:02:41 -0400 Subject: [PATCH] Commit change to native functions and wrapping for google cloud function --- .gitignore | 4 +- cloudbuild.yaml | 17 +++++ get_folders.py | 88 -------------------------- get_folders_native.py | 81 ++++++++++++++++++++++++ get_projects.py | 53 ---------------- get_projects_native.py | 42 ++++++++++++ main.py | 5 ++ owner_report.py | 78 ----------------------- owner_report_native.py | 109 ++++++++++++++++++++++++++++++++ requirements.txt | 3 + run_audit.sh | 20 ------ settings.default => settings.py | 9 ++- 12 files changed, 266 insertions(+), 243 deletions(-) create mode 100644 cloudbuild.yaml delete mode 100644 get_folders.py create mode 100644 get_folders_native.py delete mode 100644 get_projects.py create mode 100644 get_projects_native.py create mode 100644 main.py delete mode 100644 owner_report.py create mode 100644 owner_report_native.py create mode 100644 requirements.txt delete mode 100644 run_audit.sh rename settings.default => settings.py (62%) diff --git a/.gitignore b/.gitignore index fad93b8..73b1bf9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.json !schema.json -settings.py \ No newline at end of file +gcp/** +__pycache__ +.DS_Store diff --git a/cloudbuild.yaml b/cloudbuild.yaml new file mode 100644 index 0000000..9dc69e8 --- /dev/null +++ b/cloudbuild.yaml @@ -0,0 +1,17 @@ +steps: +- name: 'gcr.io/cloud-builders/gcloud' + waitFor: ['-'] + id: gcp-role-audit + args: + - functions + - deploy + - gcp-role-audit + - --region=us-central1 + - --runtime=python37 + - --memory=256MB + - --source=. + - --trigger-http + - --service-account=gcp-role-audit@up-eit-ce-production.iam.gserviceaccount.com + - --timeout=540 + - --entry-point=main_http + - --set-env-vars=ORGANIZATION_ID=521000005136,APPS_SCRIPT_FOLDER_ID=folders/678208053052,TABLE=UserAudit.gcp_audit \ No newline at end of file diff --git a/get_folders.py b/get_folders.py deleted file mode 100644 index 05b533d..0000000 --- a/get_folders.py +++ /dev/null @@ -1,88 +0,0 @@ -import subprocess -import json -from settings import ORGANIZATION_ID - -FOLDER_LIST_CMD = 'gcloud resource-manager folders list --format json' -FOLDER_IAM_CMD = 'gcloud resource-manager folders get-iam-policy --format json' - -def get_reverse_path(folder_id, path): - displayName = folders[folder_id]['displayName'] - parent = folders[folder_id]['parent'] - - parts = parent.split('/') - type = parts[0] - parent_id = parts[1] - if type == 'folders': - return get_reverse_path(folder_id=parent_id, - path=(path + '/' + displayName)) - else: - return path + '/' + displayName - return parent_id - - -def get_proper_path(reverse_path): - parts = reverse_path.split('/') - parts.reverse() - - path = '' - for part in parts: - if part: - path = path + '/' + part - - return path - - -def get_path(folder_id): - displayName = folders[folder_id]['displayName'] - parent = folders[folder_id]['parent'] - - parts = parent.split('/') - type = parts[0] - parent_id = parts[1] - if type != 'folders': - return '/' + displayName - - parent_path = folders[parent_id].get('path') - if parent_path: - return parent_path + '/' + displayName - - reverse_path = get_reverse_path(folder_id=folder_id, path='') - return get_proper_path(reverse_path=reverse_path) - - -def get_folders(folder_id, - type_flag='--folder'): - - list_cmd = FOLDER_LIST_CMD + ' ' + type_flag + '=' + str(folder_id) - folder_list = json.loads(subprocess.check_output(list_cmd, - shell=True, - stderr=subprocess.STDOUT)) - - for folder in folder_list: - name = folder['name'] - id = name.split('/')[1] - iam_cmd = FOLDER_IAM_CMD + ' ' + id - folder['perm'] = json.loads(subprocess.check_output(iam_cmd, - shell=True, - stderr=subprocess.STDOUT)) - folders[id] = folder - get_folders(folder_id=id) - - return folders - - -def add_paths(): - for folder_id in folders: - folder = folders[folder_id] - # reverse_path = get_reverse_path(folder_id=folder_id, path='') - folder['path'] = get_path(folder_id=folder_id) - - -folders = {} -folders = get_folders(folder_id=ORGANIZATION_ID, - type_flag='--organization') - -add_paths() - -with open('folders.json', 'w') as outfile: - json.dump(folders, outfile, ensure_ascii=False) diff --git a/get_folders_native.py b/get_folders_native.py new file mode 100644 index 0000000..a543fb0 --- /dev/null +++ b/get_folders_native.py @@ -0,0 +1,81 @@ +import json +import base64 +import google.auth +from google.cloud import resourcemanager_v3 + +from settings import ORGANIZATION_ID + +credentials, project_id = google.auth.default() + +def get_reverse_path(folder_id, path): + displayName = folders[folder_id]['folder'].display_name + parent = folders[folder_id]['folder'].parent + + parts = parent.split('/') + type = parts[0] + parent_id = parts[1] + if type == 'folders': + return get_reverse_path(folder_id=parent_id, + path=(path + '/' + displayName)) + else: + return path + '/' + displayName + return parent_id + + +def get_proper_path(reverse_path): + parts = reverse_path.split('/') + parts.reverse() + + path = '' + for part in parts: + if part: + path = path + '/' + part + + return path + + +def get_path(folder_id): + displayName = folders[folder_id]['folder'].display_name + parent = folders[folder_id]['folder'].parent + + parts = parent.split('/') + type = parts[0] + parent_id = parts[1] + if type != 'folders': + return '/' + displayName + + parent_path = folders[parent_id]['path'] + if parent_path: + return parent_path + '/' + displayName + + reverse_path = get_reverse_path(folder_id=folder_id, path='') + return get_proper_path(reverse_path=reverse_path) + + +def get_folders(folder_id, type_flag='folder'): + client = resourcemanager_v3.FoldersClient(credentials=credentials) + if (type_flag == 'organization'): + folder_list=client.list_folders(parent=f'organizations/{folder_id}').folders + else: + folder_list=client.list_folders(parent=f'folders/{folder_id}').folders + + for folder in folder_list: + name = folder.name + id = name.split('/')[1] + policy = client.get_iam_policy(resource = name) + folders[id] = {'folder': folder, 'perm': policy} + get_folders(folder_id=id) + + return folders + +def add_paths(): + for folder_id in folders: + + #reverse_path = get_reverse_path(folder_id=folder_id, path='') + folders[folder_id]['path'] = get_path(folder_id=folder_id) + +folders = {} +folders = get_folders(folder_id=ORGANIZATION_ID, type_flag="organization") + +add_paths() + diff --git a/get_projects.py b/get_projects.py deleted file mode 100644 index cdbe24b..0000000 --- a/get_projects.py +++ /dev/null @@ -1,53 +0,0 @@ -import subprocess -import json -from settings import APPS_SCRIPT_FOLDER_ID, EXCLUDED_PROJECTS - -IAM_POLICY_CMD = 'gcloud projects get-iam-policy --format json' - -def init_projects(): - projects = {} - project_list_cmd = 'gcloud projects list --format json' - - print('initializing projects') - proj_list = json.loads(subprocess.check_output(project_list_cmd, - shell=True, - stderr=subprocess.STDOUT)) - for proj in proj_list: - project_id = proj['projectId'] - if proj['parent']['id'] != APPS_SCRIPT_FOLDER_ID: - projects[project_id] = { - 'project': proj - } - return projects - - -def write_projects(projects): - with open('projects.json', 'w') as outfile: - json.dump(projects, outfile, ensure_ascii=False) - - -def read_projects(): - try: - with open('projects.json', 'r') as jsonfile: - return json.load(jsonfile) - except Exception: - projects = init_projects() - write_projects(projects=projects) - return projects - - -def get_iam_policy(project_id): - if project_id not in EXCLUDED_PROJECTS: - get_iam_policy_cmd = IAM_POLICY_CMD + ' ' + project_id - return json.loads(subprocess.check_output(get_iam_policy_cmd, - shell=True, - stderr=subprocess.STDOUT)) - - -projects = read_projects() -for project_id in projects: - proj = projects[project_id] - if not proj.get('iam_policy'): - print('reading iam_policy for ' + project_id) - proj['iam_policy'] = get_iam_policy(project_id=project_id) - write_projects(projects=projects) diff --git a/get_projects_native.py b/get_projects_native.py new file mode 100644 index 0000000..9651812 --- /dev/null +++ b/get_projects_native.py @@ -0,0 +1,42 @@ + +import get_folders_native +import google.auth +from google.cloud import resourcemanager_v3 + +credentials, project_id = google.auth.default() +client = resourcemanager_v3.ProjectsClient(credentials=credentials) + +from settings import APPS_SCRIPT_FOLDER_ID, EXCLUDED_PROJECTS, ORGANIZATION_ID + + +def init_projects(): + projects = {} + for id, folder in get_folders_native.folders.items(): + proj_list = client.list_projects(parent=folder['folder'].name).projects + + for proj in proj_list: + project_id = proj.project_id + if proj.parent != APPS_SCRIPT_FOLDER_ID: + projects[project_id] = { + 'project': proj + } + return projects + + + +def get_iam_policy(project_id): + if project_id not in EXCLUDED_PROJECTS: + policy = client.get_iam_policy(resource=project_id) + return policy + #get_iam_policy_cmd = IAM_POLICY_CMD + ' ' + project_id + #return json.loads(subprocess.check_output(get_iam_policy_cmd, + # shell=True, + # stderr=subprocess.STDOUT)) + + +projects = init_projects() + +for project_id in projects: + if 'iam_policy' not in projects[project_id].keys(): + projects[project_id]['iam_policy'] = get_iam_policy(project_id=projects[project_id]['project'].name) + #write_projects(projects=projects) diff --git a/main.py b/main.py new file mode 100644 index 0000000..1a3a890 --- /dev/null +++ b/main.py @@ -0,0 +1,5 @@ +def main_http(request): + import owner_report_native + +if __name__ == '__main__': + main_http(None) \ No newline at end of file diff --git a/owner_report.py b/owner_report.py deleted file mode 100644 index 5394ed7..0000000 --- a/owner_report.py +++ /dev/null @@ -1,78 +0,0 @@ -import json -import time -import pandas as pd - -def get_entry(member, role, project): - parts = member.split(':') - type = parts[0] - email = parts[1] - - entry = { - 'project': project, - 'role': role, - 'type': type, - 'email': email - } - - if project['parent']['type'] == 'folder': - parent_id = project['parent']['id'] - folder = folders.get(parent_id) - entry['project']['parent']['folder'] = folder['displayName'] - entry['project']['parent']['path'] = folder['path'] - - return entry - - -projects_by_user = {} -entries = [] -folders = {} - -df = pd.DataFrame() - -with open('folders.json', 'r') as jsonfile: - folders = json.load(jsonfile) - -with open('projects.json', 'r') as jsonfile: - projects = json.load(jsonfile) - - for project_id in projects.keys(): - proj = projects[project_id] - - try: - for binding in proj['iam_policy']['bindings']: - for member in binding['members']: - entry = get_entry(member=member, - role=binding['role'], - project=proj['project']) - - - entry_df = pd.json_normalize(entry) - entries.append(entry_df) - - #local_part = entry['email'].split('@')[0].lower() - #if not projects_by_user.get(local_part): - # projects_by_user[local_part] = [] - - #projects_by_user[local_part].append(entry) - except: - pass - -#print(json.dumps(projects_by_user)) -df = df.append(other=entries) - -# get rid of the .'s in the column names created by json_normalize -df.columns = df.columns.str.replace(r".", "_") -df.columns = df.columns.str.replace("-", "_") - -# add the date of the audit so we can create a time series -df['audit_time'] = pd.Timestamp.now().isoformat() - -# convert all field values to string type -df = df.astype(str) - -# workaround for pandas v1.1.1, due to the fact that astype(str) will convert a np.nan to the literal string 'nan'.... -# so we'll just flip it back to a none type.... -df = df.replace(['nan'], [None]) - -#output to row delimited json -df.to_json(path_or_buf='owners_nldj.json',orient='records', lines=True, date_format='iso') \ No newline at end of file diff --git a/owner_report_native.py b/owner_report_native.py new file mode 100644 index 0000000..b0389b1 --- /dev/null +++ b/owner_report_native.py @@ -0,0 +1,109 @@ +import json +import get_folders_native +import get_projects_native +import pandas as pd +from settings import TABLE + +dataset = TABLE.split('.')[0] +table = TABLE.split('.')[1] +def get_entry(member, role, project): + parts = member.split(':') + type = parts[0] + email = parts[1] + parent = project.parent.split('/') + + entry = { + 'project': {"createTime": project.create_time, "lifecycleState": project.state.name, "name": project.display_name, "parent": {"id": parent[1], "type": parent[0]}, "projectId": project.project_id, "projectNumber": project.name.split("/")[1]}, + 'role': role, + 'type': type, + 'email': email + } + parent = project.parent.split('/') + + if parent[0][:-1] == 'folder': + parent_id = parent[1] + folder = folders.get(parent_id) + entry['project']['parent']['folder'] = folder['folder'].display_name + entry['project']['parent']['path'] = folder['path'] + + return entry + +def write_to_bigquery(data): + print(data) + import io + data_as_file = io.StringIO(data) + schema = [] + with open('schema.json', 'r') as schemafile: + schema_json = json.loads(schemafile.read()) + from google.cloud import bigquery + for field in schema_json: + schema.append(bigquery.SchemaField(field.get('name'), field.get('type'), field.get('mode'), field.get('description'))) + + bq_client = bigquery.Client() + table_ref = bq_client.dataset(dataset).table(table) + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = schema + load_job = bq_client.load_table_from_file( + data_as_file, table_ref, job_config=job_config + ) # API request + + result = load_job.result() + + +projects_by_user = {} +entries = [] +folders = {} + +df = pd.DataFrame() + + +folders = get_folders_native.folders + + +projects = get_projects_native.projects + + +for project_id in projects.keys(): + proj = projects[project_id]['project'] + try: + for binding in projects[project_id]['iam_policy'].bindings: + for member in binding.members: + entry = get_entry(member=member, + role=binding.role, + project=proj) + + entry_df = pd.json_normalize(entry) + entries.append(entry_df) + + #local_part = entry['email'].split('@')[0].lower() + #if not projects_by_user.get(local_part): + # projects_by_user[local_part] = [] + + #projects_by_user[local_part].append(entry) + except: + pass + + +#print(json.dumps(projects_by_user)) +df = df.append(other=entries) + +# get rid of the .'s in the column names created by json_normalize +df.columns = df.columns.str.replace(r".", "_") +df.columns = df.columns.str.replace("-", "_") + +# add the date of the audit so we can create a time series +df['audit_time'] = pd.Timestamp.now().isoformat() + +# convert all field values to string type +df = df.astype(str) + +# workaround for pandas v1.1.1, due to the fact that astype(str) will convert a np.nan to the literal string 'nan'.... +# so we'll just flip it back to a none type.... +df = df.replace(['nan'], [None]) + +#output to row delimited json +data = df.to_json(orient='records', lines=True, date_format='iso') + +write_to_bigquery(data) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d240b17 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas +google-cloud-bigquery +google-cloud-resource-manager \ No newline at end of file diff --git a/run_audit.sh b/run_audit.sh deleted file mode 100644 index 8757356..0000000 --- a/run_audit.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# get the current directory -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -cd $DIR - -# table URI in project:dataset.table format -TABLE=":." - -# remove previous run data files -rm folders.json projects.json owners_nldj.json - -# run the audit files -python3 get_folders.py -python3 get_projects.py -python3 owner_report.py - -# load into BQ table -bq load --source_format=NEWLINE_DELIMITED_JSON "$TABLE" owners_nldj.json schema.json \ No newline at end of file diff --git a/settings.default b/settings.py similarity index 62% rename from settings.default rename to settings.py index 1177fa2..ad12845 100644 --- a/settings.default +++ b/settings.py @@ -1,11 +1,14 @@ +import os # the org id for the domain # found at Google Cloud Platform -> [Select Domain] -> IAM & Admin -> Settings -ORGANIZATION_ID = '' +ORGANIZATION_ID = os.environ['ORGANIZATION_ID'] # apps script project id. # create a new app script project at script.google.com -APPS_SCRIPT_FOLDER_ID = '' +APPS_SCRIPT_FOLDER_ID = os.environ['APPS_SCRIPT_FOLDER_ID'] # projects to exclude in the audit # comma separated, ex ['project_id_2','project_id_2'] -EXCLUDED_PROJECTS = [] \ No newline at end of file +EXCLUDED_PROJECTS = [] + +TABLE = os.environ['TABLE'] \ No newline at end of file