#!/usr/bin/python

# Python module for parsing Linux perf_event data files
#
# Copyright (C) 2009 Anton Blanchard <anton@au.ibm.com>, IBM
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version
# 2 of the License, or (at your option) any later version.


import re
import sys
import struct


class event_type:
	MMAP		= 1
	LOST		= 2
	COMM		= 3
	EXIT		= 4
	THROTTLE	= 5
	UNTHROTTLE	= 6
	FORK		= 7
	READ		= 8
	SAMPLE		= 9
	MUNMAP		= 10


class sample_format:
	IP		= 1 << 0
	TID		= 1 << 1
	TIME		= 1 << 2
	ADDR		= 1 << 3
	READ		= 1 << 4
	CALLCHAIN	= 1 << 5
	ID		= 1 << 6
	CPU		= 1 << 7
	PERIOD		= 1 << 8
	STREAM_ID	= 1 << 9
	RAW		= 1 << 10


class file_section(object):
	_format = 'Q Q'
	size = struct.calcsize(_format)

	def __init__(self, buf):
		(self.offset, self.sectionsize) = \
			struct.unpack(self._format, buf[0:self.size])


class file_attr(object):
	_format = 'L L Q Q Q Q Q L L Q'
	_base_size = struct.calcsize(_format)
	size = _base_size + file_section.size

	def __init__(self, buf):
		(self.type, self.checksize, self.config, self.sample_period, 
		 self.sample_type, self.read_format, self.features,
		 self.wakeup_events, self.reserved_2, self.reserved_3) = \
			struct.unpack(self._format, buf[0:self._base_size])

		start = self._base_size
		end = self._base_size + file_section.size
		self.ids = file_section(buf[start:end])

	def validate(self):
		if self._base_size != self.checksize:
			raise TypeError, "Invalid perf file attr"


class event_header(object):
	format = 'I H H'
	header_size = struct.calcsize(format)

	def __init__(self, buf):
		(self.type, self.misc, self.size) = \
			struct.unpack(self.format, buf)


class mmap_event(object):
	format = 'I I Q Q Q'
	_base_size = struct.calcsize(format)

	def __init__(self, buf):
		self.size = len(buf)
		slen = self.size - self._base_size
		self.format = self.format + ' %ss' % slen

		(self.pid, self.tid, self.addr, self.len, self.pgoff,
		 self.filename) = struct.unpack(self.format, buf)

		self.filename = self.filename.rstrip('\0')


class lost_event(object):
	format = 'Q Q'
	size = struct.calcsize(format)

	def __init__(self, buf):
		(self.id, self.lost) = struct.unpack(self.format, buf)


class comm_event(object):
	format = 'I I'
	_base_size = struct.calcsize(format)

	def __init__(self, buf):
		self.size = len(buf)
		slen = self.size - self._base_size
		self.format = self.format + ' %ss' % slen

		(self.pid, self.tid, self.comm) = \
			struct.unpack(self.format, buf)
		self.comm = self.comm.rstrip('\0')


class exit_event(object):
	format = 'I I I I Q'
	size = struct.calcsize(format)

	def __init__(self, buf):
		(self.pid, self.ppid, self.tid, self.ptid, self.time) = \
			struct.unpack(self.format, buf)


class throttle_event(object):
	format = 'Q Q Q'
	size = struct.calcsize(format)

	def __init__(self, buf):
		(self.time, self.id, self.stream_id) = \
			struct.unpack(self.format, buf)


class fork_event(object):
	format = 'I I I I Q'
	size = struct.calcsize(format)

	def __init__(self, buf):
		(self.pid, self.ppid, self.tid, self.ptid, self.time) = \
			struct.unpack(self.format, buf)


class sample_event(object):
	def __init__(self, buf, format):
		start = 0

		if format & sample_format.IP:
			self.ip = struct.unpack('Q', buf[start:start+8])[0]
			start += 8

		if format & sample_format.TID:
			(self.pid, self.tid) = struct.unpack('I I',
				buf[start:start+8])
			start += 8

		if format & sample_format.TIME:
			self.time = struct.unpack('Q', buf[start:start+8])[0]
			start += 8

		if format & sample_format.ADDR:
			self.addr = struct.unpack('Q', buf[start:start+8])[0]
			start += 8

		if format & sample_format.ID:
			self.id = struct.unpack('Q', buf[start:start+8])[0]
			start += 8

		if format & sample_format.STREAM_ID:
			self.stream_id = struct.unpack('Q',
				buf[start:start+8])[0]
			start += 8

		if format & sample_format.CPU:
			(self.cpu, self.res) = struct.unpack('I I',
				buf[start:start+8])
			start += 8

		if format & sample_format.PERIOD:
			self.period = struct.unpack('Q', buf[start:start+8])[0]
			start += 8

		#if format & sample_format.READ:
		# FIXME

		if format & sample_format.CALLCHAIN:
			nr = struct.unpack('Q', buf[start:start+8])[0]
			start += 8
			fmt = '%sQ' % nr
			self.callchain = struct.unpack(fmt,
				buf[start:start+(8*nr)])
			start += 8 * nr


class header(object):
	_format = 'Q Q Q'
	_base_size = struct.calcsize(_format)
	size = _base_size + 3 * file_section.size

	def __init__(self, buf):
		(self._magic, self._checksize, self._attr_checksize) = \
			struct.unpack(self._format, buf[0:self._base_size])

		if self._magic != 0x454c494646524550:
			raise TypeError, "Invalid perf header"

		start = self._base_size
		end = start + file_section.size
		self._attrs = file_section(buf[start:end])

		start = start + file_section.size
		end = start + file_section.size
		self.data = file_section(buf[start:end])

		start = start + file_section.size
		end = start + file_section.size
		self.event_types = file_section(buf[start:end])

		self.attrs = []
		nattrs = self._attrs.sectionsize / file_attr.size
		start = self._attrs.offset
		end = start + file_attr.size
		for i in range(nattrs):
			attr = file_attr(buf[start:end])
			self.attrs.append(attr)
			start += file_attr.size
			end = start + file_attr.size

	def validate(self):
		if self._magic != 0x454c494646524550 or \
		   self._checksize != self.size or \
		   self._attr_checksize != file_attr.size:
			raise TypeError, "Invalid perf header"

		for attr in self.attrs:
			attr.validate()


class perf_event(object):
	def __init__(self, filename):
		# FIXME: Not a good idea with large profiles
		f = open(filename, 'rb')
		self.buf = f.read()
		f.close()

		self.header = header(self.buf)
		self.header.validate()

		self.start = self.header.data.offset
		self.end = self.start + self.header.data.sectionsize

	def __get_event(self):
		s = self.current
		e = self.current + event_header.header_size
		hdr = self.buf[s:e]
		self.event_header = event_header(hdr)

		s = self.current + self.event_header.header_size
		e = self.current + self.event_header.size
		eventbuf = self.buf[s:e]

		if self.event_header.type == event_type.MMAP:
			self.event = mmap_event(eventbuf)
		elif self.event_header.type == event_type.MUNMAP:
			self.event = mmap_event(eventbuf)
		elif self.event_header.type == event_type.LOST:
			self.event = lost_event(eventbuf)
		elif self.event_header.type == event_type.COMM:
			self.event = comm_event(eventbuf)
		elif self.event_header.type == event_type.EXIT:
			self.event = exit_event(eventbuf)
		elif self.event_header.type == event_type.THROTTLE:
			self.event = throttle_event(eventbuf)
		elif self.event_header.type == event_type.UNTHROTTLE:
			self.event = throttle_event(eventbuf)
		elif self.event_header.type == event_type.FORK:
			self.event = fork_event(eventbuf)
		elif self.event_header.type == event_type.SAMPLE:
			# FIXME: If sampling multiple events we have an issue
			# here. Since the ID is not the first optional field
			# it might be impossible to differentiate between
			# events since the ID field would be at different
			# offsets. For now we assume all events use the same
			# set of optional fields.
			eventnr = 0
			self.event = sample_event(eventbuf,
					self.header.attrs[eventnr].sample_type)

	def first_event(self):
		self.current = self.start
		self.__get_event()

	def next_event(self):
		self.current += self.event_header.size
		if self.current >= self.end:
			self.event = False
		else:
			self.__get_event()
