Skip to content

Commit

Permalink
new traindata append to file. preparation for streamer
Browse files Browse the repository at this point in the history
  • Loading branch information
jkiesele committed Mar 9, 2021
1 parent d847c7d commit 1b29c1f
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 10 deletions.
17 changes: 17 additions & 0 deletions compiled/interface/trainData.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ class typeContainer{
void push_back(simpleArrayBase& a);
void move_back(simpleArrayBase& a);

bool operator==(const typeContainer& rhs)const;
bool operator!=(const typeContainer& rhs)const{
return !(*this==rhs);
}


simpleArrayBase& at(size_t idx);
const simpleArrayBase& at(size_t idx)const;

Expand Down Expand Up @@ -83,6 +89,11 @@ class trainData{
public:



bool operator==(const trainData& rhs)const;
bool operator!=(const trainData& rhs)const{
return !(*this==rhs);
}
//takes ownership
//these need to be separated by input type because python does not allow for overload
//but then the py interface can be made generic to accept differnt types
Expand Down Expand Up @@ -204,6 +215,9 @@ class trainData{
const std::vector<std::vector<int> > & weightShapes()const{return weight_shapes_;}

void writeToFile(std::string filename)const;
void addToFile(std::string filename)const;

void addToFileP(FILE *& f)const;

void readFromFile(std::string filename){
priv_readFromFile(filename,false);
Expand Down Expand Up @@ -276,6 +290,9 @@ class trainData{

void priv_readFromFile(std::string filename, bool memcp);

trainData priv_readFromFileP(FILE *& f, const std::string& filename)const;
void priv_readSelfFromFileP(FILE *& f, const std::string& filename);

void checkFile(FILE *& f, const std::string& filename="")const;


Expand Down
5 changes: 5 additions & 0 deletions compiled/src/c_trainData.C
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ using namespace djc;
BOOST_PYTHON_MODULE(c_trainData) {
Py_Initialize();
np::initialize();
using namespace p;
p::class_<trainData >("trainData")

.def(self==self)
.def(self!=self)

//excplicit overloading
.def<int (trainData::*)(simpleArray_float32&)>("storeFeatureArray", &trainData::storeFeatureArray)
Expand All @@ -29,6 +32,7 @@ BOOST_PYTHON_MODULE(c_trainData) {
.def<int (trainData::*)(simpleArray_float32&)>("storeWeightArray", &trainData::storeWeightArray)
.def<int (trainData::*)(simpleArray_int32&)>("storeWeightArray", &trainData::storeWeightArray)


// .def("featureList", &trainData::featureList)
// .def("truthList", &trainData::truthList)
// .def("weightList", &trainData::weightList)
Expand All @@ -46,6 +50,7 @@ BOOST_PYTHON_MODULE(c_trainData) {
.def("readFromFile", &trainData::readFromFile)
.def("readFromFileBuffered", &trainData::readFromFileBuffered)
.def("writeToFile", &trainData::writeToFile)
.def("addToFile", &trainData::addToFile)


.def("copy", &trainData::copy)
Expand Down
89 changes: 80 additions & 9 deletions compiled/src/trainData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,25 @@ void typeContainer::move_back(simpleArrayBase& a){
sorting_.push_back({isint,iarrs_.size()-1});
}
}
bool typeContainer::operator==(const typeContainer& rhs)const{
if(size() != rhs.size())
return false;
if(farrs_.size() != rhs.farrs_.size())
return false;

if(sorting_ != rhs.sorting_)
return false;

for(size_t i=0;i<farrs_.size();i++){
if(farrs_.at(i) != rhs.farrs_.at(i))
return false;
}
for(size_t i=0;i<iarrs_.size();i++){
if(iarrs_.at(i) != rhs.iarrs_.at(i))
return false;
}
return true;
}
simpleArrayBase& typeContainer::at(size_t idx){
if(idx>=sorting_.size())
throw std::out_of_range("typeContainer::at: requested "+std::to_string(idx)+" of "+std::to_string(sorting_.size()));
Expand Down Expand Up @@ -119,6 +138,22 @@ void typeContainer::readFromFile_priv(FILE *& ifile, bool justmetadata){

////////////////// trainData //////////////////////

bool trainData::operator==(const trainData& rhs)const{

if(feature_arrays_ != rhs.feature_arrays_)
return false;
if(truth_arrays_ != rhs.truth_arrays_)
return false;
if(weight_arrays_ != rhs.weight_arrays_)
return false;
if(feature_shapes_ != rhs.feature_shapes_)
return false;
if(truth_shapes_ != rhs.truth_shapes_)
return false;
if(weight_shapes_ != rhs. weight_shapes_)
return false;
return true;
}


int trainData::storeFeatureArray(simpleArrayBase & a){
Expand Down Expand Up @@ -267,6 +302,19 @@ bool trainData::validSlice(size_t splitindex_begin, size_t splitindex_end)const{
void trainData::writeToFile(std::string filename)const{

FILE *ofile = fopen(filename.data(), "wb");
addToFileP(ofile);
fclose(ofile);

}

void trainData::addToFile(std::string filename)const{

FILE *ofile = fopen(filename.data(), "ab");
addToFileP(ofile);
fclose(ofile);
}

void trainData::addToFileP(FILE *& ofile)const{
float version = DJCDATAVERSION;
io::writeToFile(&version, ofile);

Expand All @@ -279,15 +327,13 @@ void trainData::writeToFile(std::string filename)const{
feature_arrays_.writeToFile(ofile);
truth_arrays_.writeToFile(ofile);
weight_arrays_.writeToFile(ofile);
fclose(ofile);

}

void trainData::priv_readFromFile(std::string filename, bool memcp){
clear();
FILE *ifile = fopen(filename.data(), "rb");
char *buf = 0;
if(memcp){
if(false && memcp){
FILE *diskfile = ifile;
//check if exists before trying to memcp.
checkFile(ifile, filename); //not set at start but won't be used
Expand All @@ -307,6 +353,37 @@ void trainData::priv_readFromFile(std::string filename, bool memcp){
ifile = fmemopen(buf,fsize,"r");
}

priv_readSelfFromFileP(ifile,filename);
//check for eof and add until done. the append step can be heavily optimized! FIXME
//read one more byte
int ch = getc(ifile);
while(! feof(ifile)){
fseek(ifile,-1,SEEK_CUR);
append(priv_readFromFileP(ifile,filename));
ch = getc(ifile);
}

fclose(ifile);
if(buf){
delete buf;
}
}

trainData trainData::priv_readFromFileP(FILE *& ifile, const std::string& filename)const{
//include file version check
trainData out;
out.checkFile(ifile, filename);
out.readNested(out.feature_shapes_, ifile);
out.readNested(out.truth_shapes_, ifile);
out.readNested(out.weight_shapes_, ifile);

out.feature_arrays_ .readFromFile(ifile);
out.truth_arrays_.readFromFile(ifile);
out.weight_arrays_.readFromFile(ifile);
return out;
}

void trainData::priv_readSelfFromFileP(FILE *& ifile, const std::string& filename){
checkFile(ifile, filename);
readNested(feature_shapes_, ifile);
readNested(truth_shapes_, ifile);
Expand All @@ -315,12 +392,6 @@ void trainData::priv_readFromFile(std::string filename, bool memcp){
feature_arrays_ .readFromFile(ifile);
truth_arrays_.readFromFile(ifile);
weight_arrays_.readFromFile(ifile);

fclose(ifile);
if(buf){
delete buf;
}

}

void trainData::readMetaDataFromFile(const std::string& filename){
Expand Down
32 changes: 31 additions & 1 deletion testing/unit/TestTrainData.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,37 @@ def test_store(self):

def test_readWrite(self):
print('TestTrainData: readWrite')
self.sub_test_store(True)
self.sub_test_store(True)

def nestedEqual(self,l,l2):
for a,b in zip(l,l2):
if not np.all(a==b):
return False
return True

def test_AddToFile(self):
print('TestTrainData: AddToFile')

td = TrainData()
x,y,w = self.createSimpleArray('int32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
xo,yo,wo = x.copy(),y.copy(),w.copy()
x2,y2,_ = self.createSimpleArray('float32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
x2o,y2o = x2.copy(),y2.copy()
td._store([x,x2], [y,y2], [w])

td.writeToFile("testfile.tdjctd")
td.addToFile("testfile.tdjctd")


td2 = TrainData()
td2._store([xo,x2o], [yo,y2o], [wo])
td2.append(td)

td.readFromFile("testfile.tdjctd")
os.system('rm -f testfile.tdjctd')


self.assertEqual(td,td2)

def test_split(self):
print('TestTrainData: split')
Expand Down

0 comments on commit 1b29c1f

Please sign in to comment.