-
Notifications
You must be signed in to change notification settings - Fork 3.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support multiple train data on single machine #3900
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,11 +15,17 @@ namespace LightGBM { | |
|
||
class DatasetLoader { | ||
public: | ||
LIGHTGBM_EXPORT DatasetLoader(const Config& io_config, const PredictFunction& predict_fun, int num_class, const std::vector<const char*>& filenames); | ||
|
||
LIGHTGBM_EXPORT DatasetLoader(const Config& io_config, const PredictFunction& predict_fun, int num_class, const char* filename); | ||
|
||
LIGHTGBM_EXPORT ~DatasetLoader(); | ||
|
||
LIGHTGBM_EXPORT Dataset* LoadFromFile(const char* filename, int rank, int num_machines); | ||
LIGHTGBM_EXPORT Dataset* LoadFromFile(const std::vector<const char*>& filenames, int rank, int num_machines); | ||
|
||
LIGHTGBM_EXPORT Dataset* LoadFromFile(const char* filename, int rank, int num_machines) { | ||
return LoadFromFile(std::vector<const char*>{filename}, rank, num_machines); | ||
} | ||
|
||
LIGHTGBM_EXPORT Dataset* LoadFromFile(const char* filename) { | ||
return LoadFromFile(filename, 0, 1); | ||
|
@@ -40,25 +46,37 @@ class DatasetLoader { | |
const std::unordered_set<int>& categorical_features); | ||
|
||
private: | ||
Dataset* LoadFromBinFile(const char* data_filename, const char* bin_filename, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices); | ||
Dataset* LoadFromBinFile(const std::vector<const char*>& data_filename, const char* bin_filename, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices); | ||
|
||
void SetHeader(const char* filename); | ||
void SetHeader(const char* filenames); | ||
|
||
void CheckDataset(const Dataset* dataset, bool is_load_from_binary); | ||
|
||
std::vector<std::string> LoadTextDataToMemory(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices); | ||
std::vector<std::string> LoadTextDataToMemory(const std::vector<const char*>& filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices); | ||
|
||
std::vector<std::string> LoadTextDataToMemory(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices) { | ||
return LoadTextDataToMemory(std::vector<const char*>{filename}, metadata, rank, num_machines, num_global_data, used_data_indices); | ||
} | ||
|
||
std::vector<std::string> SampleTextDataFromMemory(const std::vector<std::string>& data); | ||
|
||
std::vector<std::string> SampleTextDataFromFile(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices); | ||
std::vector<std::string> SampleTextDataFromFile(const std::vector<const char*>& filenames, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices); | ||
|
||
std::vector<std::string> SampleTextDataFromFile(const char* filename, const Metadata& metadata, int rank, int num_machines, int* num_global_data, std::vector<data_size_t>* used_data_indices) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that this function ( |
||
return SampleTextDataFromFile(std::vector<const char*>{filename}, metadata, rank, num_machines, num_global_data, used_data_indices); | ||
} | ||
|
||
void ConstructBinMappersFromTextData(int rank, int num_machines, const std::vector<std::string>& sample_data, const Parser* parser, Dataset* dataset); | ||
|
||
/*! \brief Extract local features from memory */ | ||
void ExtractFeaturesFromMemory(std::vector<std::string>* text_data, const Parser* parser, Dataset* dataset); | ||
|
||
/*! \brief Extract local features from file */ | ||
void ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset); | ||
void ExtractFeaturesFromFile(const std::vector<const char*>& filenames, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset); | ||
|
||
void ExtractFeaturesFromFile(const char* filename, const Parser* parser, const std::vector<data_size_t>& used_data_indices, Dataset* dataset) { | ||
ExtractFeaturesFromFile(std::vector<const char*>{filename}, parser, used_data_indices, dataset); | ||
} | ||
|
||
/*! \brief Check can load from binary file */ | ||
std::string CheckCanLoadFromBin(const char* filename); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change
filenames
back tofilename
? Since only the first file is used forSetHeader
.