Source code for econlearn.tile

# Authors:  Neal Hughes <neal.hughes@anu.edu.au>

from __future__ import division
import numpy as np
from time import time
import sys
import pylab
from tilecode import Tilecode
from tilecode import buildgrid 

[docs]class TilecodeSamplegrid:

    """
    Construct a sample grid (sample of approximately equidistant points) from 
    a  large data set, using a tilecoding data structure

    Parameters
    -----------

    D : int,
        Number of input dimensions

    L : int,
        Number of tilings or 'layers'

    mem_max : float, optional (default = 1)
        Tile array size, values less than 1 turn on hashing

    cores : int, optional (default=1)
        Number of CPU cores to use (fitting stage is parallelized)

    offset : {'optimal', 'random', 'uniform'}, optional
        Type of displacement vector used

    Examples
    --------

    See also
    --------

    Notes
    -----

    This is an approximate method: it is possible that the resulting sample will contain
    some points less than ``radius`` distance apart. The accuracy improves with the number 
    of layers ``L``.

    Currently the tile widths are defined as ``int((b - a)  / radius)**-1``, so small changes in 
    radius may have no effect.

    """
 
    def __init__(self, D, L, mem_max=1, cores=1, offset='optimal'):

        if D == 1 and offset == 'optimal':
            offset = 'uniform'
        
        self.D = D
        self.L = L
        self.mem_max = mem_max
        self.cores = cores
        self.offset= offset
        
[docs]    def fit(self, X, radius, prop=1):
        
        """
        Fit a density function to X and return a sample grid with a maximum of M points

        Parameters
        ----------

        X : array of shape [N, D]
            Input data (unscaled)
    
        radius : float
            minimum distance between points. This determines tile widths.

        prop : float in (0, 1), optional (default=1.0)
            Proportion of sample points to return (lowest density points are excluded)
        
        Returns
        -------

        GRID, array of shape [M, D]
            The sample grid with M < N points

        """

        a = np.min(X, axis=0)
        b = np.max(X, axis=0)
        #Tr = int(1 / radius)
        #T = [Tr + 1] * self.D

        T = [int((b[i] - a[i]) / radius) + 1 for i in range(self.D)] 

        self.tile = Tilecode(self.D, T, self.L, mem_max=self.mem_max , cores=self.cores, offset = self.offset) 
        
        N = X.shape[0]
        GRID, max_points =  self.tile.fit_samplegrid(X, prop)
        self.max_points = max_points

        return GRID


[docs]class TilecodeRegressor:

    """    
    Tile coding for function approximation (Supervised Learning).  
    Fits by averaging and/or Stochastic Gradient Descent.
    Supports multi-core fit and predict. Options for uniform, random or 'optimal' displacement vectors.
    Provides option for linear spline extrapolation / filling

    Parameters
    -----------
    
    D : integer
        Total number of input dimensions 
    
    T : list of integers, length D
        Number of tiles per dimension 
    
    L : integer
        Number of tiling 'layers'

    mem_max : double, (default=1)
        Proportion of tiles to store in memory: less than 1 means hashing is used.
    
    min_sample : integer, (default=50) 
        Minimum number of observations per tile

    offset : string, (default='uniform')
        Type of displacement vector, one of 'uniform', 'random' or 'optimal'

    lin_spline : boolean, (default=False)
        Use sparse linear spline model to extrapolate / fill empty tiles

    linT : integer, (default=6)
        Number of linear spline knots per dimension
    
    Attributes
    -----------

    tile : Cython Tilecode instance
    
    """

    def __init__(self, D, T, L, mem_max = 1, min_sample=1, offset='optimal', lin_spline=False, linT=7, cores=4):
        
        if D == 1 and offset == 'optimal':
            offset = 'uniform'

        self.tile = Tilecode(D, T, L, mem_max, min_sample, offset, lin_spline, linT, cores)

[docs]    def fit(self, X, Y, method='A', score=False, copy=True, a=0, b=0, pc_samp=1, eta=0.01, n_iters=1, scale=0):

        """    
        Estimate tilecode weights. 
        Supports `Averaging', Stochastic Gradient Descent (SGD) and Averaged SGD.

        Parameters
        -----------
        X : array, shape=(N, D) 
            Input data (unscaled)

        Y : array, shape=(N) 
            Output data (unscaled)

        method : string (default='A')
            Estimation method, one of 'A' (for Averaging), 'SGD' or 'ASGD'.

        score : boolean, (default=False)
            Calculate R-squared

        copy : boolean (default=False)
            Store X and Y

        a : array, optional shape=(D) 
            Percentile to use for minimum tiling range (if not provided set to 0)
        
        b : array, optional, shape=(D) 
            Percentile to use for maximum tiling range (if not provided set to 100)

        pc_samp : float, optional, (default=1)
            Proportion of sample to use when calculating percentile ranges

        eta : float (default=.01)
            SGD Learning rate

        n_iters : int (default=1)
            Number of passes over the data set in SGD

        scale : float (default=0)
            Learning rate scaling factor in SGD
        """

        if method == 'A':
            sgd = False
            asgd = False
        elif method == 'SGD':
            sgd = True
            asgd = False
        elif method == 'ASGD':
            sgd = True
            asgd = True
        
        if X.ndim == 1:
            X = X.reshape([X.shape[0], 1])

        self.tile.fit(X, Y, score=score, copy=copy, a=a, b=b, pc_samp=pc_samp, sgd=sgd, asgd=asgd, eta=eta, scale=scale, n_iters=n_iters)

[docs]    def check_memory(self, ):
        
        """
        Provides information on the current memory usage of the tilecoding scheme.
        If memory usage is an issue call this function after fitting and then consider rebuilding the scheme with a lower `mem_max` parameter.
        """

        print 'Number of Layers: ' + str(self.tile.L)
        print 'Tiles per layer: ' + str(self.tile.SIZE)
        print 'Total tiles: ' + str(self.tile.L * self.tile.SIZE)
        print 'Weight array size after hashing: ' + str(self.tile.mem_max)
        temp = np.count_nonzero(self.tile.count) / self.tile.mem_max
        print 'Percentage of weight array active: ' + str(np.count_nonzero(self.tile.count) / self.tile.mem_max)
        mem_max = self.tile.mem_max / (self.tile.L*self.tile.SIZE)
        print '----------------------------------------------'
        print 'Estimated current memory usage (Mb): ' + str((self.tile.mem_max * 2 * 8)/(1024**2))
        print '----------------------------------------------'
        print 'Current hashing parameter (mem_max): ' + str(mem_max)
        print 'Minimum hashing parameter (mem_max): ' + str(temp*mem_max)

[docs]    def predict(self, X):
        
        """    
        Return tilecode predicted value 

        Parameters
        -----------
        X : array, shape=(N, D) or (D,)
            Input data

        Returns
        --------
    
        Y : array, shape=(N,)
            Predicted values
        """

        return self.tile.predict(X)

[docs]    def plot(self, xargs=['x'], showdata=True):

        """
        Plot the function on along one dimension, holding others fixed 

        Parameters
        -----------
        xargs : list, length = D
            List of variable default values, set plotting dimension to 'x'
            Not required if D = 1

        showdata : boolean, (default=False)
            Scatter training points
        """

        self.tile.plot(xargs=xargs, showdata=showdata)
        pylab.show()

[docs]class TilecodeDensity:

    """    
    Tile coding approximation of the pdf of X  
    Fits by averaging. Supports multi-core fit and predict.
    Options for uniform, random or 'optimal' displacement vectors.

    Parameters
    -----------
    
    D : integer
        Total number of input dimensions 
    
    T : list of integers, length D
        Number of tiles per dimension 
    
    L : integer
        Number of tiling 'layers'

    mem_max : double, (default=1)
        Proportion of tiles to store in memory: less than 1 means hashing is used.
    
    min_sample : integer, (default=50) 
        Minimum number of observations per tile

    offset : string, (default='uniform')
        Type of displacement vector, one of 'uniform', 'random' or 'optimal'

    Attributes
    -----------

    tile : Tilecode instance
    
    Examples
    --------

    See also
    --------

    Notes
    -----

    """

    def __init__(self, D, T, L, mem_max = 1, offset='optimal', cores=1):

        if D == 1 and offset == 'optimal':
            offset = 'uniform'
        
        self.tile = Tilecode(D, T, L, mem_max=mem_max, min_sample=1, offset=offset, cores=cores)
    
    def fit(self, X, cdf=False):
        
        N = X.shape[0]

        if X.ndim == 1:
            X = X.reshape([X.shape[0], 1])
        
        self.tile.fit(X, np.zeros(N))
        d = np.array(self.tile.d)
        w = (d**-1) / np.array(self.tile.T) 
        adj = np.product(w)**-1  
        self.tile.countsuminv = (1 / N) * adj

    def predict(self, X):

        return self.tile.predict_prob(X)

[docs]    def plot(self, xargs=['x']):

        """
        Plot the pdf along one dimension, holding others fixed 

        Parameters
        -----------
        xargs : list, length = D
            List of variable default values, set plotting dimension to 'x'
            Not required if D = 1
        """

        self.tile.plot_prob(xargs=xargs)
        pylab.show()


[docs]class TilecodeNearestNeighbour:

    """
    Fast approximate nearest neighbour search using tile coding data structure  
    

    Parameters
    -----------

    D : int,
        Number of input dimensions

    L : int,
        Number of tilings or 'layers'

    mem_max : float, optional (default = 1)
        Tile array size, values less than 1 turn on hashing

    cores : int, optional (default=1)
        Number of CPU cores to use (fitting stage is parallelized)

    offset : {'optimal', 'random', 'uniform'}, (default='optimal') optional
        Type of displacement vector used

    Examples
    --------

    See also
    --------

    Notes
    -----

    This is an approximate method: it is possible that some points > than radius may be included 
    and some < than radius may be excluded.

    """
 
    def __init__(self, D, L, mem_max=1, cores=1, offset='optimal'):

        if D == 1 and offset == 'optimal':
            offset = 'uniform'
        
        self.D = D
        self.L = L
        self.mem_max = mem_max
        self.cores = cores
        self.offset= offset
        
[docs]    def fit(self, X, radius, prop=1):
        
        """
        Fit a tile coding data structure to X

        Parameters
        ----------

        X : array of shape [N, D]
            Input data (unscaled)
    
        radius : float 
            radius for nearest neighbor queries. Tile widths for each dimension
            of X are int((b[i] - a[i]) / radius) where b and a are the
            max and min values of X[:,i].
        """

        a = np.min(X, axis=0)
        b = np.max(X, axis=0)

        T = [int((b[i] - a[i]) / radius) + 1 for i in range(self.D)] 
        
        self.tile = Tilecode(self.D, T, self.L, mem_max=self.mem_max , cores=self.cores, offset = self.offset) 
        
        self.tile.fit(X, np.ones(X.shape[0]), unsupervised=True, copy=True)

[docs]    def predict(self, X, thresh = 1):

        """
        Obtain nearest neighbors (points within distance radius)

        Parameters
        ----------

        X : array of shape [N, D]
            Query points

        thresh : int, (default=1)
            Only include points if they are active in at least thresh layers (max is L)
            Higher thresh values will tend to exclude the points furthest from the query point

        Returns
        -------

        Y : list of arrays (length = N)
            Nearest neighbors for each query point
        """
        N = X.shape[0]
        Y = np.array(self.tile.nearest(X, N, thresh))
        #idx = Y > -0.1

        #return [Y[i, idx[i,:]] for i in range(N)]


[docs]class TilecodeQVIteration:

    """
    Solve a MDP with 1 policy variable and D state variables by Q-V Iteration
    
    Parameters
    -----------

    D : int, 
        Number of state variables
    
    T : list of integers, length D
        Number of tiles per dimension 

    L : int,
        Number of tilings or 'layers'

    radius : float,
        Radius for state space sample grid

    beta : float in (0, 1),
        Discount rate

    ms : int, optional (default = 1)
        Minimum samples per tile for the Q function
    
    mem_max : float, optional (default = 1)
        Tile array size, values less than 1 turns on hashing

    cores : int, optional (default=1)
        Number of CPU cores to use 
 
    ASGD : boolean, optional (default=True)
        Fit Q function by ASGD

    offset : {'optimal', 'random', 'uniform'}, (default='optimal')
        Type of displacement vector used

    linT : integer, optional (default=6)
        Number of linear spline knots per dimension

    Examples
    --------

    See also
    --------

    Notes
    -----


    """
 
 
    def __init__(self, D, T, L, radius, beta, ms=1, mem_max=1, cores=1,  ASGD=True, linT=6):

        self.Q_f = Tilecode(D + 1, T, L, mem_max, min_sample=ms, cores=cores)
        
        self.radius = radius
        
        Twv = int((1 / self.radius) / 2)
        T = [Twv for t in range(D)]
        L = int(130 / Twv)
        
        # Initialize policy function
        self.A_f = Tilecode(D, T, L, mem_max = 1, lin_spline=True, linT=linT, cores=cores)
        
        # Initialize value function
        self.V_f = Tilecode(D, T, L, mem_max = 1, lin_spline=True, linT=linT, cores=cores)
        
        self.first = True
        
        self.D = D
        self.beta = beta
        self.CORES = cores
        self.asgd = ASGD

[docs]    def resetQ(self, D, T, L, mem_max=1, ms=1):
    
        """
        Reset the Q function
        
        Parameters
        -----------

        D : int, 
            Number of state variables
        
        T : list of integers, length D
            Number of tiles per dimension 

        L : int,
            Number of tilings or 'layers'

        mem_max : float, optional (default = 1)
            Tile array size, values less than 1 turns on hashing
        
        ms : int, optional (default = 1)
            Minimum samples per tile for the Q function

        """

        self.Q_f = Tilecode(D + 1, T, L, mem_max, min_sample=ms, cores=self.CORES)
    
    
[docs]    def iterate(self, XA, X1, R, A_low, A_high, ITER=50, plot=False, plotdim=0, output=True, a=0, b=0, pc_samp=1, eta=0.8, maxT=60000, tilesg=False, sg_points=100, sg_prop=0.96,  sg_samp=1,  sgmem_max=0.4):

        """
        Perform QV iteration given a set of training data (N state-action and state transition samples) 
        to derive optimal value and policy functions
        
        Parameters
        -----------

        XA : array of shape [N, D + 1]
            State-action samples (i.e., actions in first column, then state variables) 
        
        X1 : array of shape [N, D]
            State transition samples (i.e., state at t+1)
        
        R : array of shape [N,]
            Payoff samples
        
        A_low : array of shape [N,], 
            Lower feasible bound for action A conditional on X

        A_high : array of shape [N,], 
            Upper feasible bound for action A conditional on X

        ITER : int, optional (default = 50)
            Number of iterations
        
        plot : boolean, optional (default = True)
            Whether to generate plots of the final value and policy function. 
        
        plotdim : int in [0, D],  optional (default = 0)
            Which state dimension to plot 
            (other dimensions are held fixed at their mean values). 
        
        a : array, optional, shape=(D) 
            Percentile to use for minimum tiling domain (if not provided set to 0)
        
        b : array, optional, shape=(D) 
            Percentile to use for maximum tiling domain (if not provided set to 100)

        pc_samp : float, optional, (default=1)
            Proportion of sample to use when calculating percentile ranges

        output : boolean, optional (default=True)
            Whether to print value function change updates each iteration
        
        eta : float (default=.01)
            ASGD / SGD learning rate
        
        maxT : int, default (default=60000)
            ASGD / SGD learning rate parameter

        tilesg : boolean, (default=False)
            If True then will use tilecoding to build state space sample grid
            else will use distance method. Tilecoding is preferred for large samples.
        
        sg_points : int, (default=100)
            If tilesg=False, then the number of points in the state space sample grid
        
        sg_prop : float, (default=0.96)
            If tilesg=True, then the proportion of points to include in the 
            state space sample grid (set less than 1 to exclude outliers)
        
        sg_samp : float, (default=0.5)  
            If tilesg=True, then the proportion of the sample points to use for the 
            state space sample grid 
        
        sgmem_max : float, (default = 0.4)
            If tilesg=True, then the mem_max (hashing) parameter of the sample grid
            tilecode scheme.  

        """
        tic = time()
        
        self.v_e = 0        # Value function error
        self.p_e = 0        # Policy function error
        
        T = XA.shape[0]
        
        self.value_error = np.zeros(ITER)
        
        if not(tilesg):
            grid, m = buildgrid(X1, sg_points, self.radius, scale=True, stopnum=X1.shape[0])
        else: 
            nn = int(X1.shape[0]*sg_samp)
            tic = time()
            tile = TilecodeSamplegrid(X1.shape[1], 25, mem_max=sgmem_max, cores=self.CORES)
            grid = tile.fit(X1[0:nn], self.radius, prop=sg_prop)
            toc = time()
            if output:
                print 'State grid points: ' + str(grid.shape[0]) + ', of maximum: ' + str(tile.max_points) + ', Time taken: ' + str(toc - tic)
            del tile
         
        points = grid.shape[0]

        if self.first:
            self.A_f.fit(grid, np.zeros(points))
            self.V_f.fit(grid, np.zeros(points))
            self.first = False
        
        minpol = np.min(A_low)
        maxpol = np.max(A_high)
        
        if ITER == 1:
            precompute = False
        else:
            precompute = True
        
        # ------------------
        #   Q-learning
        # ------------------

        ############ First iteration ================
        j = 0

        # Q values
        ticfit = time()
        Q = R + self.beta * self.V_f.predict(X1, store_XS=precompute)
        tocfit = time()
        if output:
            print 'Initial V prediction time: ' + str(tocfit - ticfit)
        
        # Fit Q function
        ticfit = time()
        self.Q_f.fit(XA, Q, pa=minpol, pb=maxpol , copy=True, unsupervised=precompute, sgd=self.asgd, asgd=self.asgd, eta=eta, n_iters=1, scale=1* (1 / min(T, maxT)), storeindex=(self.asgd and precompute), a=a, b=b, pc_samp=pc_samp)
        tocfit = time()
        if output:
            print 'Initial Q Fitting time: ' + str(tocfit - ticfit)

        # Optimise Q function
        self.value_error[0], W_opt, state = self.maximise(grid, A_low, A_high, output=output)
        ########### =================================

        ## Remaining iterations

        for j in range(1, ITER):
            # Q values
            Q = R + self.beta * self.V_f.fast_values()
            
            # Fit Q function
            self.Q_f.partial_fit(Q, 0)

            # Optimise Q function
            self.value_error[j], A_opt, state = self.maximise(grid, A_low, A_high, output=output)
        
        # Final policy function

        #A_opt_old = self.A_f.predict(grid)
        self.A_f.fit(state, A_opt, sgd=0, eta=0.1, n_iters=5, scale=0)
        A_opt_new = self.A_f.predict(grid)
        #self.pe = np.mean((W_opt_old - W_opt_new))/np.mean(W_opt_old)
        
        toc = time()

        if output:
            print 'Solve time: ' + str(toc - tic)
        
        if plot:
            xargs = [np.mean(X1[:, i]) for i in range(self.D)]
            xargs[plotdim] = 'x' 
            xargstemp = xargs
            self.A_f.plot(xargs, showdata=True)
            pylab.show()
            self.V_f.plot(xargstemp, showdata=True)
            pylab.show()

[docs]    def maximise(self, grid, A_low, A_high, output=True):

        """
        Maximises current Q-function for a subset of state space points and returns new value and policy functions

        Parameters
        -----------

        grid : array, shape=(N, D)
            State space grid

        A_low : array, shape=(N,)
            action lower bound

        A_high : array, shape=(N,)
            action upper bound
        
        Returns
        -----------

        ERROR: float
            Mean absolute deviation

        """
        
        tic = time()

        A_old = self.A_f.predict(grid)
        
        X =  np.vstack([A_old, grid.T]).T
        
        [W_opt, V, state, idx] = self.Q_f.opt(X, A_low, A_high)
        nidx = np.array([not(i) for i in idx])
        if output:
            print 'Number of optimisation points: ' + str(len(W_opt))

        V_old = self.V_f.predict(state)
        
        self.V_f.fit(state, V, sgd=0, eta=0.1, n_iters=5, scale=0)
        
        if np.count_nonzero(V_old) < V_old.shape[0]:
            self.ve = 1
        else:
            self.ve = np.mean(abs(V_old - V)/V_old)

        toc = time()
        
        if output:
            print 'Value change: ' + str(round(self.ve, 3)) + '\t---\tMaximisation time: ' + str(round(toc - tic, 4))

        return [self.ve, W_opt, state]