Skip to content

Commit 1738be3

Browse files
committed
Union half-works now
Union will do what mergecap does (without mergecap). Next step is to remove redundant frames to just unique ones.
1 parent 179c680 commit 1738be3

File tree

4 files changed

+140
-42
lines changed

4 files changed

+140
-42
lines changed

gateway.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# limitations under the License.
1515
"""A journey of a thousand miles begins with a hop through your gateway.
1616
17-
This file exists to
17+
Provides an entry point for pcapgraph.
1818
"""
1919
from pcapgraph.pcapgraph import run
2020

pcapgraph/pcap_math.py renamed to pcap_algebra/pcap_math.py

Lines changed: 130 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616

1717
import subprocess as sp
1818
import os
19+
import json
20+
import collections
21+
22+
from pcapgraph.parse_options import get_tshark_status, decode_stdout
23+
from pcapgraph.parse_options import get_packet_count
1924

2025

2126
def pcap_intersector(pcap_names):
@@ -43,7 +48,7 @@ def pcap_intersector(pcap_names):
4348
"""
4449

4550

46-
def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
51+
def intersect_pcap(pcap1, pcap2, has_temporal_intersection):
4752
"""Create a packet capture intersection out of two files using ipids.
4853
4954
Let 2 packet captures have the following packets and assume that traffic
@@ -83,15 +88,14 @@ def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
8388
Args:
8489
pcap1 (string): Filename of packet capture 1.
8590
pcap2 (string): Filename of packet capture 2.
91+
has_temporal_intersection (bool): Type of intesection (other is id).
8692
"""
8793
# Init vars
8894
pcap_info = {
8995
pcap1: [],
9096
pcap2: []
9197
}
92-
# TODO replace with get_tshark_status
93-
os.environ["PATH"] += os.pathsep + os.pathsep.join(
94-
["C:\\Program Files\\Wireshark"])
98+
get_tshark_status()
9599

96100
# Get a list of sequential ip ids from both packet captures
97101
for pcap in (pcap1, pcap2):
@@ -104,7 +108,7 @@ def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
104108
pcap_packet_list = pcap_output.split('\r\n')[:-1]
105109
for packet in pcap_packet_list:
106110
frame_num, ip_id = packet.split('\t')
107-
# Every frame number should be present as we iterate through packets
111+
# Every frame num should be present as we iterate through packets
108112
pcap_info[pcap].append(ip_id)
109113

110114
# Using index instead of dictionary element because in Python 3.6,
@@ -171,10 +175,60 @@ def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
171175
sp.Popen(framed_pcap_cmds[pcap])
172176

173177

178+
def union_pcap(*pcaps):
179+
"""Given sets A = (1, 2, 3), B = (2, 3, 4), A + B = (1, 2, 3, 4).
180+
181+
About:
182+
This method uses tshark to get identifying information on
183+
pcaps and then mergepcap to save the combined pcap.
184+
185+
Use case:
186+
* For a packet capture that contains a broadcast storm, this function
187+
will find unique packets.
188+
* For any other situation where you need to find all unique packets.
189+
* This function can be lossy with timestamps because excluding
190+
packets in diff pcaps with diff timestamps, but same content is the
191+
purpose of this function.
192+
193+
mergecap <file>... -w union.pcap
194+
Merges multiple pcaps and saves them as union.pcap
195+
(preserves timestamps).
196+
197+
Args:
198+
*pcaps (list(str)): List of pcap filenames.
199+
"""
200+
raw_packet_list = []
201+
# Using packet text as dict key ensures no duplicate packets. The result
202+
# of this for loop is a pcap_dict with all unique packets from all pcaps.
203+
for pcap in pcaps:
204+
print(pcap)
205+
packet_dict = get_pcap_as_json(pcap)
206+
for packet in packet_dict:
207+
raw_packet_list.append(packet['_source']['layers']['frame_raw'])
208+
209+
print("Packet statistics", collections.Counter(raw_packet_list))
210+
pcap_text = ''
211+
for packet in raw_packet_list:
212+
pcap_text += convert_to_pcaptext(packet)
213+
print(pcap_text)
214+
save_pcap_cmds = ['text2pcap', '-', 'union.pcap']
215+
save_pcap = sp.Popen(save_pcap_cmds, stdin=sp.PIPE, stdout=sp.PIPE)
216+
save_pcap.communicate(input=pcap_text.encode())
217+
218+
219+
def difference_pcap(pcap1, pcap2):
220+
"""Given sets A = (1, 2, 3), B = (2, 3, 4), A-B = (1).
221+
222+
This method will find the intersection using intersect_pcap() and then
223+
remove those packets from A, and save with tshark.
224+
"""
225+
226+
174227
def search_for_common_frame(frame_list1, frame_list2):
175228
"""Search for a common frame by iterating through list1 and then list2.
176229
177-
To search the list in reverse, pass in a reversed list.
230+
Default is to go in forward direction.
231+
To search the both lists in reverse, pass in 2 reversed lists.
178232
179233
Args:
180234
frame_list1 (list): List of ip_ids from pcap1
@@ -191,4 +245,73 @@ def search_for_common_frame(frame_list1, frame_list2):
191245
if pcap1_packet_ip_id == pcap2_packet_ip_id:
192246
return pcap1_index + 1, pcap2_index + 1
193247

194-
framed_pcap_intersector('examples/simul1.pcap', 'examples/simul2.pcap')
248+
249+
def get_pcap_as_json(pcap):
250+
"""Given a pcap, return a json with `tshark -r <file> -x -T json`.
251+
252+
tshark -r <pcap> -w -
253+
Pipes packet capture one packet per line to stdout
254+
tshark -r -
255+
Read file from stdin
256+
tshark -r <in.pcap> -x | text2pcap - <out.pcap>
257+
Prints hex of pcap to stdout and then resaves it as a pcap. This
258+
WILL delete packet timestamps as that is not encoded in hex output.
259+
260+
Args:
261+
pcap (string): File name.
262+
Returns:
263+
(dict): Dict of the pcap json provided by tshark.
264+
"""
265+
get_json_cmds = ['tshark', '-r', pcap, '-x', '-T', 'json']
266+
pcap_json = sp.Popen(get_json_cmds, stdout=sp.PIPE).communicate()[0]
267+
return json.loads(pcap_json)
268+
269+
270+
def convert_to_pcaptext(raw_packet):
271+
"""Convert the raw pcap hex to a form that text2cap can read from stdin.
272+
273+
`tshark -r <file> -T json -x` produces the "in" and text2pcap
274+
requires the "out" formats as shown below:
275+
276+
Per Text2pcap documentation:
277+
"Text2pcap understands a hexdump of the form generated by od -Ax -tx1 -v."
278+
279+
In format (newlines added for readability):
280+
247703511344881544abbfdd0800452000542bbc00007901e8fd080808080a301290000
281+
082a563110001f930ab5b00000000a9e80d0000000000101112131415161718191a1b1c
282+
1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
283+
284+
Out format:
285+
0000 24 77 03 51 13 44 88 15 44 ab bf dd 08 00 45
286+
0010 00 68 f9 75 40 00 40 11 99 4f 0a 80 80 80 0a
287+
0020 12 90 00 35 8d ff 00 54 0c cb e2 df 81 80 00
288+
0030 00 03 00 00 00 00 06 61 6d 61 7a 6f 6e 03 63
289+
0040 6d 00 00 01 00 01 c0 0c 00 01 00 01 00 00 00
290+
0050 00 04 b0 20 62 a6 c0 0c 00 01 00 01 00 00 00
291+
0060 00 04 b0 20 67 cd c0 0c 00 01 00 01 00 00 00
292+
0070 00 04 cd fb f2 67
293+
294+
NOTE: Output format doesn't need an extra \n between packets. So in the
295+
above example, the next line could be 0000 00 ... for the next packet.
296+
"""
297+
# init vars
298+
formatted_string = ''
299+
hex_chars_per_line = 32
300+
hex_chars_per_byte = 2
301+
num_chars = len(raw_packet)
302+
303+
# Parse the string into lines and each line into space-delimited bytes.
304+
for line_sep in range(0, num_chars, hex_chars_per_line):
305+
raw_line = raw_packet[line_sep: line_sep + hex_chars_per_line]
306+
line = ''
307+
for byte_sep in range(0, hex_chars_per_line, hex_chars_per_byte):
308+
line += raw_line[byte_sep: byte_sep + hex_chars_per_byte] + ' '
309+
line = line[:-1] # get rid of trailing space
310+
line_sep_hex = line_sep // 32 * 10 # Offsets need to be in hex.
311+
formatted_string += '{:>04d}'.format(line_sep_hex) + ' ' + line + '\n'
312+
313+
return formatted_string
314+
315+
316+
union_pcap('examples/simul1.pcap', 'examples/simul2.pcap')
317+

pcapgraph/parse_options.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ def get_pcap_dict(filenames, has_compare_pcaps, verbosity, is_anon):
164164
filenames (list): A list of filepaths.
165165
has_compare_pcaps (bool): Has the user has provided the '-c' option.
166166
verbosity (bool): Whether to provide user with additional context.
167-
packet_intersect (bool): Whether to output packet intersection pcap.
168167
is_anon (bool): Whether to anonymize packet capture names.
169168
Return:
170169
(dict): A dict with all of the data that graph functions need.
@@ -268,14 +267,14 @@ def get_pcap_vars(filename):
268267
pcap_end = float(decode_stdout(pcap_end_raw))
269268

270269
tcpdump_release_time = 946684800
271-
if pcap_start < tcpdump_release_time or pcap_end < tcpdump_release_time:
272-
print("!!! Packets from ", filename, " must have traveled via"
273-
" a flux capacitor because they're in the past or the future!"
270+
if pcap_start < tcpdump_release_time or \
271+
pcap_end < tcpdump_release_time:
272+
print("!!! Packets from ", filename, " must have traveled via "
273+
"a flux capacitor because they're in the past or the future!"
274274
"\n!!! Timestamps predate the release of tcpdump or "
275275
"are negative.\n!!! Excluding from results.\n")
276276
return 0, 0, 0
277277

278-
print(packet_count, pcap_start, pcap_end)
279278
return packet_count, pcap_start, pcap_end
280279

281280
# (else) May need to raise an exception for this as it means input is bad.
@@ -321,37 +320,12 @@ def get_pcap_similarity(pivot_pcap, other_pcap, verbosity):
321320
# Iterate over all packets with the given frame number.
322321
pcap_starttime = time.time()
323322
print("--compare percent similar starting for", other_pcap + "... ")
324-
tshark_filters = [
325-
'-2', '-Y', 'ip', '-T', 'fields', '-e', 'ip.id', '-e', 'ip.src', '-e',
326-
'ip.dst', '-e', 'tcp.ack', '-e', 'tcp.seq', '-e', 'udp.srcport'
327-
]
328-
pivot_raw_output = \
329-
sp.Popen(['tshark', '-r', pivot_pcap, *tshark_filters],
330-
stdout=sp.PIPE, stderr=sp.PIPE)
331-
pivot_pkts = set(decode_stdout(pivot_raw_output).split('\n'))
332-
other_raw_output = \
333-
sp.Popen(['tshark', '-r', other_pcap, *tshark_filters],
334-
stdout=sp.PIPE, stderr=sp.PIPE)
335-
other_pkts = set(decode_stdout(other_raw_output).split('\n'))
336-
total_count = len(pivot_pkts)
337-
# Use python's set functions to find the fastest intersection of packets.
338-
same_pkts = set(pivot_pkts).intersection(other_pkts)
339-
similarity_count = len(same_pkts)
323+
324+
325+
340326
percent_same = round(100 * (similarity_count / total_count))
341327

342328
if verbosity:
343329
print("\tand it took", time.time() - pcap_starttime, 'seconds.')
344330

345331
return percent_same
346-
347-
348-
def save_pcap_intersection(filenames):
349-
"""Save the pcap intersection as a pcap.
350-
351-
Given that the intersection returns a packet capture
352-
"""
353-
pivot = filenames[0]
354-
pivot_raw_output = \
355-
sp.Popen(['tshark', '-r', pivot_pcap, *tshark_filters],
356-
stdout=sp.PIPE, stderr=sp.PIPE)
357-
pivot_pkts = set(decode_stdout(pivot_raw_output).split('\n'))

pcapgraph/pcapgraph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"""PcapGraph
1616
1717
Usage:
18-
pcapgraph [-acV] [-i | --output <format>] (--dir <dir>... | <file>...)
18+
pcapgraph [-acV] [--output <format>] (--dir <dir>... | <file>...)
19+
pcapgraph (diff | union | intersect) (--dir <dir>... | <file>...)
1920
pcapgraph (-g | --generate-pcaps) [--int <interface>]
2021
pcapgraph (-h | --help)
2122
pcapgraph (-v | --version)

0 commit comments

Comments
 (0)