Source code for radical.analytics.experiment


from .session import Session


# ------------------------------------------------------------------------------
#
[docs]class Experiment(object):

    # --------------------------------------------------------------------------
    #
[docs]    def __init__(self, sources, stype):
        '''
        This class represents an RCT experiment, i.e., a series of RA sessions
        which are collectively analyzed.

        `sources` is expected to be a list of tuples of session source paths
        pointing to tarballs or session directories.  The order of tuples in the
        list determines the default order used in plots etc.

        The session type `stype` will be uniformely applied to all sessions.
        '''

        # FIXME: this is missing an abstraction: `Run`: a collection of sessions
        #         which share the same parameters and thus can be statistically
        #         handled together (means, std-deviation, etc).  Right now we
        #         only use this `Experiment` abstraction for non-statistical
        #         analysis (event plots, utilization plots, etc.)

        self._sessions = list()

        for src in sources:
            self._sessions.append(Session.create(src=src, stype=stype))


    # --------------------------------------------------------------------------
    #
    @property
    def sessions(self):
        return self._sessions

    @property
    def sids(self):
        return [s.sid for s in self._sessions]


    # --------------------------------------------------------------------------
    #
[docs]    def utilization(self, metrics, rtype='cpu', udurations=None):
        '''
        return five dictionaries:
          - provided resources
          - consumed resources
          - absolute stats
          - relative stats
          - information about resource utilization

        The resource dictionaries have the following structures::

            provided = {
                <session_id> : {
                    'metric_1' : {
                        'uid_1'        : [float, list],
                        'uid_2'        : [float, list],
                        ...
                    },
                    'metric_2' : {
                        'uid_1'        : [float, list],
                        'uid_2'        : [float, list],
                        ...
                    },
                    ...
                },
                ...
            }
            consumed = {
                <session_id> : {
                    'metric_1' : {
                        'uid_1'         : [float, list]
                        'uid_2'         : [float, list],
                        ...
                    },
                    'metric_2' :         {
                        'uid_1'         : [float, list],
                        'uid_2'         : [float, list],
                        ...
                    },
                    ...
                },
                ...
            }

        `float` is always in tasks of `resource * time`, (think `core-hours`),
        `list` is a list of 4-tuples `[t0, t1, r0, r1]` which signify at what
        specific time interval (`t0 to t1`) what specific resources (`r0 to r1`)
        have been used.  The task of the resources are here dependent on the
        session type: only RP sessions are supported at the moment where those
        resource values are indexes in to the list of cores used in that
        specific session (offset over multiple pilots, if needed).
        '''

        # FIXME: the data structure documented above is not yet implemented

        provided  = dict()
        consumed  = dict()
        stats_abs = dict()
        stats_rel = dict()
        info      = dict()

        # obtain resources provisions and consumptions for all sessions
        for session in self._sessions:

            sid = session.uid
            p, c, sa, sr, i = session.utilization(metrics, rtype, udurations)

            provided [sid] = p
            consumed [sid] = c
            stats_abs[sid] = sa
            stats_rel[sid] = sr
            info     [sid] = i

        return provided, consumed, stats_abs, stats_rel, info


    # --------------------------------------------------------------------------
    #
    def _dump_ts(self, dname, e, spec, psize=0):

        pass
      # ts   = e.timestamps(event=spec)
      # diff = ts[-1] - ts[0]
      # print '%-10s : %-55s : %3d : %10.1f - %10.1f = %10.1f -> %10.1f' \
      #     % (dname, spec, len(ts), ts[-1], ts[0], diff, diff * psize)


# ------------------------------------------------------------------------------