from libc.stdint cimport int16_t, int32_t, uint64_t from preshed.maps cimport MapStruct from libcpp.vector cimport vector from libc.stdlib cimport malloc, calloc, free, realloc from libc.string cimport memcpy, memset from murmurhash.mrmr cimport real_hash64 as hash64 from .typedefs cimport len_t, idx_t, atom_t, weight_t from .linalg cimport VecVec include "compile_time_constants.pxi" ctypedef vector[weight_t] vector_weight_t ctypedef void (*do_update_t)( weight_t* weights, weight_t* gradient, len_t nr, const ConstantsC* hp, ) nogil ctypedef void (*do_feed_fwd_t)( weight_t** fwd, const weight_t* W, const len_t* shape, int nr_layer, int nr_batch, const ConstantsC* hp ) nogil ctypedef void (*do_feed_bwd_t)( weight_t* G, weight_t** bwd, const weight_t* W, const weight_t* const* fwd, const len_t* shape, int nr_layer, int nr_batch, const ConstantsC* hp ) nogil # Alias this, so that it matches our naming scheme ctypedef MapStruct MapC cdef struct ConstantsC: weight_t a weight_t b weight_t c weight_t d weight_t e weight_t g weight_t h weight_t i weight_t j weight_t k weight_t l weight_t m weight_t n weight_t o weight_t p weight_t q weight_t r weight_t s weight_t t weight_t u weight_t w weight_t x weight_t y weight_t z cdef struct EmbedC: MapC** weights MapC** gradients weight_t** defaults weight_t** d_defaults idx_t* offsets len_t* lengths len_t nr int nr_support cdef struct NeuralNetC: do_feed_fwd_t feed_fwd do_feed_bwd_t feed_bwd do_update_t update len_t* widths weight_t* weights weight_t* gradient EmbedC* embed len_t nr_layer len_t nr_weight len_t nr_node ConstantsC hp cdef extern from "stdlib.h": int posix_memalign(void **memptr, size_t alignment, size_t size) nogil void* valloc (size_t size) nogil cdef struct ExampleC: int* is_valid weight_t* costs uint64_t* atoms FeatureC* features weight_t* scores int nr_class int nr_atom int nr_feat cdef cppclass MinibatchC: weight_t** _fwd weight_t** _bwd FeatureC** _feats len_t* _nr_feat weight_t* _costs int* _is_valid uint64_t* signatures len_t* widths int i int nr_layer int batch_size __init__(len_t* widths, int nr_layer, int batch_size) nogil: this.i = 0 this.nr_layer = nr_layer this.batch_size = batch_size this.widths = calloc(nr_layer, sizeof(len_t)) this._fwd = calloc(nr_layer, sizeof(weight_t*)) this._bwd = calloc(nr_layer, sizeof(weight_t*)) for i in range(nr_layer): this.widths[i] = widths[i] this._fwd[i] = calloc(this.widths[i] * batch_size, sizeof(weight_t)) this._bwd[i] = calloc(this.widths[i] * batch_size, sizeof(weight_t)) this._feats = calloc(batch_size, sizeof(void*)) this._nr_feat = calloc(batch_size, sizeof(len_t)) this._is_valid = calloc(batch_size * widths[nr_layer-1], sizeof(int)) this._costs = calloc(batch_size * widths[nr_layer-1], sizeof(weight_t)) this.signatures = calloc(batch_size, sizeof(uint64_t)) __dealloc__() nogil: free(this.widths) for i in range(this.nr_layer): free(this._fwd[i]) free(this._bwd[i]) for i in range(this.i): free(this._feats[i]) free(this._fwd) free(this._bwd) free(this._feats) free(this._nr_feat) free(this._is_valid) free(this._costs) free(this.signatures) void reset() nogil: for i in range(this.nr_layer): memset(this._fwd[i], 0, sizeof(this._fwd[i][0]) * this.batch_size * this.widths[i]) memset(this._bwd[i], 0, sizeof(this._bwd[i][0]) * this.batch_size * this.widths[i]) memset(this._nr_feat, 0, sizeof(this._nr_feat[0]) * this.batch_size) memset(this.signatures, 0, sizeof(this.signatures[0]) * this.batch_size) memset(this._costs, 0, sizeof(this._costs[0]) * this.nr_out() * this.batch_size) memset(this._is_valid, 0, sizeof(this._is_valid[0]) * this.nr_out() * this.batch_size) for i in range(this.i): free(this._feats[i]) this._feats[i] = NULL this.i = 0 int nr_in() nogil: return this.widths[0] int nr_out() nogil: return this.widths[this.nr_layer - 1] int push_back(const FeatureC* feats, int nr_feat, const weight_t* costs, const int* is_valid, uint64_t key) nogil: # Hash the features, to see if the batch has a matching input. # If it does, just update the gradient for it. if key != 0: for i in range(this.i): if this.signatures[i] == key: VecVec.add_i(this.costs(i), costs, 1.0, this.nr_out()) return 0 if this.i >= this.batch_size: this.reset() this.i = 0 # This is done in reset() --- but make it obvious this.signatures[this.i] = key this._nr_feat[this.i] = nr_feat this._feats[this.i] = calloc(nr_feat, sizeof(FeatureC)) memcpy(this._feats[this.i], feats, nr_feat * sizeof(this._feats[this.i][0])) memcpy(this.costs(this.i), costs, this.nr_out() * sizeof(costs[0])) if is_valid is not NULL: memcpy(this.is_valid(this.i), is_valid, this.nr_out() * sizeof(is_valid[0])) else: for i in range(this.nr_out()): this.is_valid(this.i)[i] = 1 this.i += 1 return this.i >= this.batch_size FeatureC* features(int i) nogil: return this._feats[i] int nr_feat(int i) nogil: return this._nr_feat[i] weight_t* fwd(int i, int j) nogil: return this._fwd[i] + (j * this.widths[i]) weight_t* bwd(int i, int j) nogil: return this._bwd[i] + (j * this.widths[i]) weight_t* scores(int i) nogil: return this.fwd(this.nr_layer-1, i) weight_t* losses(int i) nogil: return this.bwd(this.nr_layer-1, i) weight_t* costs(int i) nogil: return this._costs + (i * this.nr_out()) int* is_valid(int i) nogil: return this._is_valid + (i * this.nr_out()) int guess(int i) nogil: return VecVec.arg_max_if_true(this.scores(i), this.is_valid(i), this.nr_out()) int best(int i) nogil: return VecVec.arg_max_if_zero(this.scores(i), this.costs(i), this.nr_out()) cdef packed struct SparseArrayC: int32_t key weight_t val cdef struct FeatureC: int i uint64_t key weight_t value cdef struct SparseAverageC: SparseArrayC* curr SparseArrayC* mom1 SparseArrayC* mom2 SparseArrayC* avgs SparseArrayC* times SparseArrayC* penalties weight_t penalty cdef struct TemplateC: int[MAX_TEMPLATE_LEN] indices int length atom_t[MAX_TEMPLATE_LEN] atoms