Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
df-scripts/flowdump.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
114 lines (94 sloc)
4.04 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import csv | |
import ijson | |
import os | |
import sys | |
import urllib2 | |
from tabulate import tabulate | |
def loadapikey(key='deepfield.key'): | |
"""Load api_key from env or file""" | |
if os.environ.get('DEEPFIELD_API_KEY') is not None: | |
api_key = os.environ['DEEPFIELD_API_KEY'] | |
else: | |
with open(key) as f: | |
api_key = f.readline().rstrip() | |
return api_key | |
def loadip(myfile='ip.csv'): | |
"""Iterate over IPs in file and load to a list. | |
Defaults to ip.csv""" | |
with open(myfile) as f: | |
ips = f.read().splitlines() | |
return ips | |
def flowquery( ips, timestamp='-15min:', api_key=loadapikey() ): | |
"""Returns columns, data tuple | |
Queries Deepfield flowdump context for all flows related to ips passed in via list | |
Currently returns all fields and defaults to last 15 minutes | |
Args: | |
ips: list of IP addresses | |
timestamp: begin[:end], defaults to -15min: | |
Returns: | |
columns: list of header names | |
data: nested list of results | |
""" | |
columns = [] | |
data = [] | |
filename = 'temp_output.json' | |
for ip in ips: | |
for flow in ['src','dst']: | |
ip_local = str(ip) | |
url = 'https://internet2-2.deepfield.net/cube/flowdump.json?api_key={0}&d=timestamp,addr.src,port.src,addr.dst,port.dst,protocol,tcpflags,interfaces.input,interfaces.output&s=timestamp({1})&a=timestep(auto)&slice=addr.{2}({3})'.format(api_key, timestamp, flow, ip_local) | |
print 'Querying data for {0} ({1}), timestamp = {2}'.format(ip_local, flow, timestamp) | |
page = urllib2.urlopen(url) | |
f = open(filename, 'w') | |
f.write(page.read()) | |
f.close() | |
if not columns: | |
with open(filename) as f: | |
dimensions = ijson.items(f, 'meta_data.dimensions.item') | |
columns = list(dimensions) | |
with open(filename) as f: | |
measures = ijson.items(f, 'meta_data.measures.item') | |
columns = columns + list(measures) | |
with open(filename, 'r') as f: | |
objects = ijson.items(f, 'cube.item') | |
for row in objects: | |
selected_row = [] | |
for item in columns: | |
selected_row.append(row[columns.index(item)]) | |
data.append(selected_row) | |
return columns, data | |
def flowwrite(columns, data, myfile='output.csv'): | |
"""Writes columns, data to a csv file | |
Args: | |
columns: column header list | |
data: data list | |
myfile: name of output file. defaults to output.csv | |
Returns: | |
writes csv file | |
""" | |
with open(myfile,'wb') as f: | |
wr = csv.writer(f, dialect='excel') | |
wr.writerow(columns) | |
wr.writerows(data) | |
def main(args): | |
"""Called when running flowdump.py as a script""" | |
parser = argparse.ArgumentParser(description='Query the Deepfield flowdump context for all flows related to given IP address(es). Prints to stdout if no write argument is given.', epilog='Example: flowdump.py -i 192.0.2.2 -t 2017-01-01T12-00:2017-01-01T12-15 -w output.csv') | |
group = parser.add_mutually_exclusive_group(required=True) | |
group.add_argument('-i','--ip', help='IP address to search for') | |
group.add_argument('-r','--read', help='Read IPs from file. One IP per line') | |
parser.add_argument('-t','--timestamp', help='timestamp. If using a relative timestamp, call as -t=-15min:', default='-15min:') | |
parser.add_argument('-w','--write', help='Write results to file') | |
args = parser.parse_args() | |
if args.ip is not None: | |
ips = [args.ip] | |
if args.read is not None: | |
ips = loadip(args.read) | |
if args.write is not None: | |
columns, data = flowquery(ips,args.timestamp) | |
flowwrite(columns, data, args.write) | |
if args.write is None: | |
columns, data = flowquery(ips,args.timestamp) | |
print tabulate(data, headers=columns) | |
if __name__ == '__main__': | |
main(sys.argv) |