From 0830a3d6df1aa7d4a579c37f61b71397f6d338b3 Mon Sep 17 00:00:00 2001 From: johncreed Date: Sat, 3 Jul 2021 19:41:22 +0800 Subject: [PATCH 1/3] Fix SSE inner_product calculation. --- mf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mf.cpp b/mf.cpp index b90aa96..d82a371 100644 --- a/mf.cpp +++ b/mf.cpp @@ -598,7 +598,7 @@ mf_float Utility::inner_product(mf_float *p, mf_float *q, mf_int k) XMM = _mm_add_ps(XMM, _mm_mul_ps( _mm_load_ps(p+d), _mm_load_ps(q+d))); __m128 XMMtmp = _mm_add_ps(XMM, _mm_movehl_ps(XMM, XMM)); - XMM = _mm_add_ps(XMM, _mm_shuffle_ps(XMMtmp, XMMtmp, 1)); + XMM = _mm_add_ps(XMMtmp, _mm_shuffle_ps(XMMtmp, XMMtmp, 1)); mf_float product; _mm_store_ss(&product, XMM); return product; From 93f093d8774290f5528143922820acbcc3385405 Mon Sep 17 00:00:00 2001 From: jyhsia Date: Sat, 3 Jul 2021 21:18:00 +0800 Subject: [PATCH 2/3] 1. Add calc_loss function 2. Print initial loss and reg to cerr. Check the init model is the same with matlab code. 3. Print time. 4. Disable scale 5. Save initial model --- mf.cpp | 123 ++++++++++++++++++++++++++++++++++++++++++++++++--------- mf.h | 1 + 2 files changed, 105 insertions(+), 19 deletions(-) diff --git a/mf.cpp b/mf.cpp index b90aa96..907bca5 100644 --- a/mf.cpp +++ b/mf.cpp @@ -598,7 +598,7 @@ mf_float Utility::inner_product(mf_float *p, mf_float *q, mf_int k) XMM = _mm_add_ps(XMM, _mm_mul_ps( _mm_load_ps(p+d), _mm_load_ps(q+d))); __m128 XMMtmp = _mm_add_ps(XMM, _mm_movehl_ps(XMM, XMM)); - XMM = _mm_add_ps(XMM, _mm_shuffle_ps(XMMtmp, XMMtmp, 1)); + XMM = _mm_add_ps(XMMtmp, _mm_shuffle_ps(XMMtmp, XMMtmp, 1)); mf_float product; _mm_store_ss(&product, XMM); return product; @@ -976,8 +976,7 @@ void Utility::grid_shuffle_scale_problem_on_disk( mf_float* Utility::malloc_aligned_float(mf_long size) { // Check if conversion from mf_long to size_t causes overflow. - if (size >= 0 && sizeof(unsigned long) >= sizeof(mf_long) && - (unsigned long)size > numeric_limits::max() / sizeof(mf_float) + 1) + if (size > numeric_limits::max() / sizeof(mf_float) + 1) throw bad_alloc(); // [REVIEW] I hope one day we can use C11 aligned_alloc to replace // platform-depedent functions below. Both of Windows and OSX currently @@ -1028,7 +1027,7 @@ mf_model* Utility::init_model(mf_int fun, model->Q = nullptr; mf_float scale = (mf_float)sqrt(1.0/k_real); - default_random_engine generator; + default_random_engine generator(0); // (exp) fix random seed to zero. uniform_real_distribution distribution(0.0, 1.0); try @@ -1115,11 +1114,11 @@ mf_model* Utility::init_model(mf_int m, mf_int n, mf_int k) vector Utility::gen_random_map(mf_int size) { - default_random_engine generator; + srand(0); vector map(size, 0); for(mf_int i = 0; i < size; ++i) map[i] = i; - shuffle(map.begin(), map.end(), generator); + random_shuffle(map.begin(), map.end()); return map; } @@ -2893,6 +2892,27 @@ shared_ptr SolverFactory::get_solver( return solver; } +mf_double calc_loss(vector &blocks, mf_model &model){ + mf_node *N; + mf_float *p; + mf_float *q; + mf_double loss = 0; + for(mf_int i = 0; i < (mf_long)blocks.size(); ++i){ + BlockBase* block = blocks[i]; + block->reload(); + while(block->move_next()) + { + N = block->get_current(); + p = model.P+(mf_long)N->u*model.k; + q = model.Q+(mf_long)N->v*model.k; + mf_double z = Utility::inner_product(p,q,model.k); + z = N->r-z; + loss += z*z; + } + } + return loss; +} + void fpsg_core( Utility &util, Scheduler &sched, @@ -2950,6 +2970,8 @@ void fpsg_core( } cout.width(13); cout << "obj"; + cout.width(10); + cout << "time"; cout << "\n"; } @@ -2959,6 +2981,21 @@ void fpsg_core( vector> solvers(param.nr_threads); vector threads; threads.reserve(param.nr_threads); + + if(true){ // (exp) toggle init model check + mf_double init_reg2 = util.calc_reg2(*model, param.lambda_p2, + param.lambda_q2, omega_p, omega_q); + cerr << "initial reg: "; + cerr.width(15); + cerr << fixed << setprecision(4) << 0.5*init_reg2; + mf_double init_loss = calc_loss(block_ptrs, *model); + cerr.width(15); + cerr << "initial loss: "; + cerr.width(15); + cerr << fixed << setprecision(4) << 0.5*init_loss << endl; + } + + double st = omp_get_wtime(); for(mf_int i = 0; i < param.nr_threads; ++i) { solvers[i] = SolverFactory::get_solver(sched, block_ptrs, @@ -2978,7 +3015,7 @@ void fpsg_core( param.lambda_q1, omega_p, omega_q); mf_double reg2 = util.calc_reg2(*model, param.lambda_p2, param.lambda_q2, omega_p, omega_q); - mf_double tr_loss = sched.get_loss(); + mf_double tr_loss = calc_loss(block_ptrs, *model); // (exp) mf_double tr_loss = sched.get_loss(); mf_double tr_error = sched.get_error()/tr->nnz; switch(param.fun) @@ -3025,7 +3062,9 @@ void fpsg_core( cout << fixed << setprecision(4) << va_error; } cout.width(13); - cout << fixed << setprecision(4) << scientific << reg+tr_loss; + cout << fixed << setprecision(4) << scientific << 0.5*(reg+tr_loss); + cout.width(10); + cout << fixed << setprecision(2) << omp_get_wtime() - st; cout << "\n" << flush; } @@ -3081,10 +3120,10 @@ try vector blocks(param.nr_bins*param.nr_bins); vector block_ptrs(param.nr_bins*param.nr_bins); vector ptrs; - vector p_map; - vector q_map; - vector inv_p_map; - vector inv_q_map; + vector p_map, p_map_cp; + vector q_map, q_map_cp; + vector inv_p_map, inv_p_map_cp; + vector inv_q_map, inv_q_map_cp; vector omega_p; vector omega_q; mf_float avg = 0; @@ -3109,12 +3148,12 @@ try if(param.fun == P_L2_MFR || param.fun == P_L1_MFR || param.fun == P_KL_MFR) - scale = max((mf_float)1e-4, std_dev); + scale = 1.0; // (exp) disable scale scale = max((mf_float)1e-4, std_dev); - p_map = Utility::gen_random_map(tr->m); - q_map = Utility::gen_random_map(tr->n); - inv_p_map = Utility::gen_inv_map(p_map); - inv_q_map = Utility::gen_inv_map(q_map); + p_map = p_map_cp = Utility::gen_random_map(tr->m); + q_map = q_map_cp = Utility::gen_random_map(tr->n); + inv_p_map = inv_p_map_cp = Utility::gen_inv_map(p_map); + inv_q_map = inv_q_map_cp = Utility::gen_inv_map(q_map); omega_p = vector(tr->m, 0); omega_q = vector(tr->n, 0); @@ -3128,6 +3167,12 @@ try tr->m, tr->n, param.k, avg/scale, omega_p, omega_q), [] (mf_model *ptr) { mf_destroy_model(&ptr); }); + if(true){ // (exp) toggle save model + Utility::shuffle_model(*model, inv_p_map_cp, inv_q_map_cp); + mf_save_initial_model(model.get()); + Utility::shuffle_model(*model, p_map_cp, q_map_cp); // (exp) shuffle model base + } + for(mf_int i = 0; i < (mf_long)blocks.size(); ++i) block_ptrs[i] = &blocks[i]; @@ -4071,10 +4116,10 @@ CrossValidatorBase::CrossValidatorBase(mf_parameter param_, mf_int nr_folds_) mf_double CrossValidatorBase::do_cross_validation() { vector cv_blocks; - default_random_engine generator; + srand(0); for(mf_int block = 0; block < nr_bins*nr_bins; ++block) cv_blocks.push_back(block); - shuffle(cv_blocks.begin(), cv_blocks.end(), generator); + random_shuffle(cv_blocks.begin(), cv_blocks.end()); if(!quiet) { @@ -4352,6 +4397,46 @@ mf_int mf_save_model(mf_model const *model, char const *path) return 0; } +// (exp) save init model for matlab libmf code. +mf_int mf_save_initial_model(mf_model const *model) +{ + ofstream fp("P.model"); + ofstream fq("Q.model"); + if(!fp.is_open()) + return 1; + + if(!fq.is_open()) + return 1; + + auto write = [&] (ofstream &f, mf_float *ptr, mf_int size, char prefix) + { + for(mf_int i = 1; i < size; ++i) // (exp) the data index is started from index 1 + { + mf_float *ptr1 = ptr + (mf_long)i*model->k; + if(isnan(ptr1[0])) + { + for(mf_int d = 0; d < model->k; ++d) + f << 0 << " "; + } + else + { + for(mf_int d = 0; d < model->k; ++d) + f << ptr1[d] << " "; + } + f << endl; + } + + }; + + write(fp, model->P, model->m, 'p'); + write(fq, model->Q, model->n, 'q'); + + fp.close(); + fq.close(); + + return 0; +} + mf_model* mf_load_model(char const *path) { ifstream f(path); diff --git a/mf.h b/mf.h index 83bf892..e06d360 100644 --- a/mf.h +++ b/mf.h @@ -72,6 +72,7 @@ struct mf_model mf_problem read_problem(std::string path); mf_int mf_save_model(struct mf_model const *model, char const *path); +mf_int mf_save_initial_model(struct mf_model const *model); struct mf_model* mf_load_model(char const *path); From 47e723c330567ac6a0c1a63bfd740c851d610245 Mon Sep 17 00:00:00 2001 From: jyhsia Date: Thu, 8 Jul 2021 13:36:35 +0800 Subject: [PATCH 3/3] Fix output model trailing whitespace. --- mf.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mf.cpp b/mf.cpp index 907bca5..1a8c884 100644 --- a/mf.cpp +++ b/mf.cpp @@ -4415,13 +4415,15 @@ mf_int mf_save_initial_model(mf_model const *model) mf_float *ptr1 = ptr + (mf_long)i*model->k; if(isnan(ptr1[0])) { - for(mf_int d = 0; d < model->k; ++d) - f << 0 << " "; + f << 0 << " "; + for(mf_int d = 1; d < model->k; ++d) + f << " " << 0; } else { - for(mf_int d = 0; d < model->k; ++d) - f << ptr1[d] << " "; + f << ptr1[0] << " "; + for(mf_int d = 1; d < model->k; ++d) + f << " " << ptr1[d]; } f << endl; }