16
16
17
17
import subprocess as sp
18
18
import os
19
+ import json
20
+ import collections
21
+
22
+ from pcapgraph .parse_options import get_tshark_status , decode_stdout
23
+ from pcapgraph .parse_options import get_packet_count
19
24
20
25
21
26
def pcap_intersector (pcap_names ):
@@ -43,7 +48,7 @@ def pcap_intersector(pcap_names):
43
48
"""
44
49
45
50
46
- def framed_pcap_intersector (pcap1 , pcap2 , has_temporal_intersection ):
51
+ def intersect_pcap (pcap1 , pcap2 , has_temporal_intersection ):
47
52
"""Create a packet capture intersection out of two files using ipids.
48
53
49
54
Let 2 packet captures have the following packets and assume that traffic
@@ -83,15 +88,14 @@ def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
83
88
Args:
84
89
pcap1 (string): Filename of packet capture 1.
85
90
pcap2 (string): Filename of packet capture 2.
91
+ has_temporal_intersection (bool): Type of intesection (other is id).
86
92
"""
87
93
# Init vars
88
94
pcap_info = {
89
95
pcap1 : [],
90
96
pcap2 : []
91
97
}
92
- # TODO replace with get_tshark_status
93
- os .environ ["PATH" ] += os .pathsep + os .pathsep .join (
94
- ["C:\\ Program Files\\ Wireshark" ])
98
+ get_tshark_status ()
95
99
96
100
# Get a list of sequential ip ids from both packet captures
97
101
for pcap in (pcap1 , pcap2 ):
@@ -104,7 +108,7 @@ def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
104
108
pcap_packet_list = pcap_output .split ('\r \n ' )[:- 1 ]
105
109
for packet in pcap_packet_list :
106
110
frame_num , ip_id = packet .split ('\t ' )
107
- # Every frame number should be present as we iterate through packets
111
+ # Every frame num should be present as we iterate through packets
108
112
pcap_info [pcap ].append (ip_id )
109
113
110
114
# Using index instead of dictionary element because in Python 3.6,
@@ -171,10 +175,60 @@ def framed_pcap_intersector(pcap1, pcap2, has_temporal_intersection):
171
175
sp .Popen (framed_pcap_cmds [pcap ])
172
176
173
177
178
+ def union_pcap (* pcaps ):
179
+ """Given sets A = (1, 2, 3), B = (2, 3, 4), A + B = (1, 2, 3, 4).
180
+
181
+ About:
182
+ This method uses tshark to get identifying information on
183
+ pcaps and then mergepcap to save the combined pcap.
184
+
185
+ Use case:
186
+ * For a packet capture that contains a broadcast storm, this function
187
+ will find unique packets.
188
+ * For any other situation where you need to find all unique packets.
189
+ * This function can be lossy with timestamps because excluding
190
+ packets in diff pcaps with diff timestamps, but same content is the
191
+ purpose of this function.
192
+
193
+ mergecap <file>... -w union.pcap
194
+ Merges multiple pcaps and saves them as union.pcap
195
+ (preserves timestamps).
196
+
197
+ Args:
198
+ *pcaps (list(str)): List of pcap filenames.
199
+ """
200
+ raw_packet_list = []
201
+ # Using packet text as dict key ensures no duplicate packets. The result
202
+ # of this for loop is a pcap_dict with all unique packets from all pcaps.
203
+ for pcap in pcaps :
204
+ print (pcap )
205
+ packet_dict = get_pcap_as_json (pcap )
206
+ for packet in packet_dict :
207
+ raw_packet_list .append (packet ['_source' ]['layers' ]['frame_raw' ])
208
+
209
+ print ("Packet statistics" , collections .Counter (raw_packet_list ))
210
+ pcap_text = ''
211
+ for packet in raw_packet_list :
212
+ pcap_text += convert_to_pcaptext (packet )
213
+ print (pcap_text )
214
+ save_pcap_cmds = ['text2pcap' , '-' , 'union.pcap' ]
215
+ save_pcap = sp .Popen (save_pcap_cmds , stdin = sp .PIPE , stdout = sp .PIPE )
216
+ save_pcap .communicate (input = pcap_text .encode ())
217
+
218
+
219
+ def difference_pcap (pcap1 , pcap2 ):
220
+ """Given sets A = (1, 2, 3), B = (2, 3, 4), A-B = (1).
221
+
222
+ This method will find the intersection using intersect_pcap() and then
223
+ remove those packets from A, and save with tshark.
224
+ """
225
+
226
+
174
227
def search_for_common_frame (frame_list1 , frame_list2 ):
175
228
"""Search for a common frame by iterating through list1 and then list2.
176
229
177
- To search the list in reverse, pass in a reversed list.
230
+ Default is to go in forward direction.
231
+ To search the both lists in reverse, pass in 2 reversed lists.
178
232
179
233
Args:
180
234
frame_list1 (list): List of ip_ids from pcap1
@@ -191,4 +245,73 @@ def search_for_common_frame(frame_list1, frame_list2):
191
245
if pcap1_packet_ip_id == pcap2_packet_ip_id :
192
246
return pcap1_index + 1 , pcap2_index + 1
193
247
194
- framed_pcap_intersector ('examples/simul1.pcap' , 'examples/simul2.pcap' )
248
+
249
+ def get_pcap_as_json (pcap ):
250
+ """Given a pcap, return a json with `tshark -r <file> -x -T json`.
251
+
252
+ tshark -r <pcap> -w -
253
+ Pipes packet capture one packet per line to stdout
254
+ tshark -r -
255
+ Read file from stdin
256
+ tshark -r <in.pcap> -x | text2pcap - <out.pcap>
257
+ Prints hex of pcap to stdout and then resaves it as a pcap. This
258
+ WILL delete packet timestamps as that is not encoded in hex output.
259
+
260
+ Args:
261
+ pcap (string): File name.
262
+ Returns:
263
+ (dict): Dict of the pcap json provided by tshark.
264
+ """
265
+ get_json_cmds = ['tshark' , '-r' , pcap , '-x' , '-T' , 'json' ]
266
+ pcap_json = sp .Popen (get_json_cmds , stdout = sp .PIPE ).communicate ()[0 ]
267
+ return json .loads (pcap_json )
268
+
269
+
270
+ def convert_to_pcaptext (raw_packet ):
271
+ """Convert the raw pcap hex to a form that text2cap can read from stdin.
272
+
273
+ `tshark -r <file> -T json -x` produces the "in" and text2pcap
274
+ requires the "out" formats as shown below:
275
+
276
+ Per Text2pcap documentation:
277
+ "Text2pcap understands a hexdump of the form generated by od -Ax -tx1 -v."
278
+
279
+ In format (newlines added for readability):
280
+ 247703511344881544abbfdd0800452000542bbc00007901e8fd080808080a301290000
281
+ 082a563110001f930ab5b00000000a9e80d0000000000101112131415161718191a1b1c
282
+ 1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
283
+
284
+ Out format:
285
+ 0000 24 77 03 51 13 44 88 15 44 ab bf dd 08 00 45
286
+ 0010 00 68 f9 75 40 00 40 11 99 4f 0a 80 80 80 0a
287
+ 0020 12 90 00 35 8d ff 00 54 0c cb e2 df 81 80 00
288
+ 0030 00 03 00 00 00 00 06 61 6d 61 7a 6f 6e 03 63
289
+ 0040 6d 00 00 01 00 01 c0 0c 00 01 00 01 00 00 00
290
+ 0050 00 04 b0 20 62 a6 c0 0c 00 01 00 01 00 00 00
291
+ 0060 00 04 b0 20 67 cd c0 0c 00 01 00 01 00 00 00
292
+ 0070 00 04 cd fb f2 67
293
+
294
+ NOTE: Output format doesn't need an extra \n between packets. So in the
295
+ above example, the next line could be 0000 00 ... for the next packet.
296
+ """
297
+ # init vars
298
+ formatted_string = ''
299
+ hex_chars_per_line = 32
300
+ hex_chars_per_byte = 2
301
+ num_chars = len (raw_packet )
302
+
303
+ # Parse the string into lines and each line into space-delimited bytes.
304
+ for line_sep in range (0 , num_chars , hex_chars_per_line ):
305
+ raw_line = raw_packet [line_sep : line_sep + hex_chars_per_line ]
306
+ line = ''
307
+ for byte_sep in range (0 , hex_chars_per_line , hex_chars_per_byte ):
308
+ line += raw_line [byte_sep : byte_sep + hex_chars_per_byte ] + ' '
309
+ line = line [:- 1 ] # get rid of trailing space
310
+ line_sep_hex = line_sep // 32 * 10 # Offsets need to be in hex.
311
+ formatted_string += '{:>04d}' .format (line_sep_hex ) + ' ' + line + '\n '
312
+
313
+ return formatted_string
314
+
315
+
316
+ union_pcap ('examples/simul1.pcap' , 'examples/simul2.pcap' )
317
+
0 commit comments