Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop/two stage #12

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 121 additions & 2 deletions ICE/ICE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,36 @@ void ICE::LoadDict(unordered_map<string, unordered_map<string, double>>& graph)
vvnet.LoadDict(graph);
}

void ICE::SaveWeights(string model_name){
void ICE::LoadWeights(string filename){
vvnet.LoadWeights(filename, w_context);
}

void ICE::SaveVertexWeights(string model_name){

cout << "Save Model:" << endl;
ofstream model(model_name);
if (model)
{
//model << vvnet.MAX_vid << " " << dim << endl;
for (auto k: vvnet.keys)
{
if (vvnet.vertex[vvnet.kmap[k]].branch > 0)
{
model << k;
for (int d=0; d<dim; ++d)
model << " " << w_vertex[vvnet.kmap[k]][d];
model << endl;
}
}
cout << "\tSave to <" << model_name << ">" << endl;
}
else
{
cout << "\tfail to open file" << endl;
}
}

void ICE::SaveContextWeights(string model_name){

cout << "Save Model:" << endl;
ofstream model(model_name);
Expand All @@ -27,6 +56,37 @@ void ICE::SaveWeights(string model_name){
//model << vvnet.MAX_vid << " " << dim << endl;
for (auto k: vvnet.keys)
{
if (vvnet.vertex[vvnet.kmap[k]].branch > 0)
{
model << k;
for (int d=0; d<dim; ++d)
model << " " << w_context[vvnet.kmap[k]][d];
model << endl;
}
}
cout << "\tSave to <" << model_name << ">" << endl;
}
else
{
cout << "\tfail to open file" << endl;
}
}

void ICE::SaveEntityWeights(string model_name){

cout << "Save Model:" << endl;
ofstream model(model_name);
int i = 0;
if (model)
{
//model << vvnet.MAX_vid << " " << dim << endl;
for (auto k: vvnet.keys)
{
// cout << k << endl;
if (i < vocab_count){
i++;
continue;
}
if (vvnet.vertex[vvnet.kmap[k]].branch > 0)
{
model << k;
Expand All @@ -43,6 +103,10 @@ void ICE::SaveWeights(string model_name){
}
}

int ICE::getVocabCount(){
vocab_count = vvnet.keys.size();
}

void ICE::Init(int dimension) {

cout << "Model Setting:" << endl;
Expand Down Expand Up @@ -70,7 +134,7 @@ void ICE::Init(int dimension) {
}


void ICE::Train(int sample_times, int negative_samples, double alpha, double alpha_min, int workers){
void ICE::TrainStage1(int sample_times, int negative_samples, double alpha, double alpha_min, int workers){

omp_set_num_threads(workers);

Expand Down Expand Up @@ -103,6 +167,7 @@ void ICE::Train(int sample_times, int negative_samples, double alpha, double alp
{
v1 = vvnet.SourceSample();
v2 = vvnet.TargetSample(v1);

vvnet.UpdatePair(w_vertex, w_context, v1, v2, dim, negative_samples, _alpha);

count++;
Expand All @@ -121,3 +186,57 @@ void ICE::Train(int sample_times, int negative_samples, double alpha, double alp

}

void ICE::TrainStage2(int sample_times, int negative_samples, double alpha, double alpha_min, int workers, int vocab_count){

omp_set_num_threads(workers);

cout << "Model:" << endl;
cout << "\t[ICE]" << endl;

cout << "Learning Parameters:" << endl;
cout << "\tsample_times:\t\t" << sample_times << " (*Million)" << endl;
cout << "\tnegative_samples:\t" << negative_samples << endl;
cout << "\talpha:\t\t\t" << alpha << endl;
cout << "\tworkers:\t\t" << workers << endl;

cout << "Start Training:" << endl;

unsigned long long total_sample_times = (unsigned long long)sample_times*1000000;
double alpha_last, alpha_reduce;
double _alpha = alpha;
alpha_reduce = (alpha-alpha_min)/(total_sample_times/MONITOR);

unsigned long long current_sample = 0;
unsigned long long jobs = total_sample_times/workers;

#pragma omp parallel for
for (int worker=0; worker<workers; ++worker)
{
unsigned long long count = 1;
long v1, v2;

while (count<jobs)
{
v1 = vvnet.SourceSample();
v2 = vvnet.TargetSample(v1);

if (v1 >= vocab_count){
vvnet.UpdateVertex(w_vertex, w_context, v1, v2, dim, negative_samples, _alpha);
}

count++;
if (count % MONITOR == 0)
{
_alpha -= alpha_reduce;
current_sample += MONITOR;
if (_alpha < alpha_min) _alpha = alpha_min;
alpha_last = _alpha;
printf("\tAlpha: %.6f\tProgress: %.3f %%%c", _alpha, (double)(current_sample)/total_sample_times * 100, 13);
fflush(stdout);
}
}
}
printf("\tAlpha: %.6f\tProgress: 100.00 %%\n", alpha_last);

}

10 changes: 8 additions & 2 deletions ICE/ICE.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,24 @@ class ICE {

// model parameters
int dim;
int vocab_count;
vector< vector<double> > w_vertex;
vector< vector<double> > w_context;

// data function
void LoadEdgeList(string);
void LoadWeights(string);
void LoadItemConceptList(string);
void LoadDict(unordered_map<string, unordered_map<string, double>>&);
void SaveWeights(string);
void SaveVertexWeights(string);
void SaveContextWeights(string);
void SaveEntityWeights(string);
int getVocabCount();

// model function
void Init(int);
void Train(int, int, double, double, int);
void TrainStage1(int, int, double, double, int);
void TrainStage2(int, int, double, double, int, int);

};

Expand Down
Loading