reading file line by line, skipping long lines when parsing
This commit is contained in:
parent
17c39cdc6a
commit
75ae416c52
@ -15,16 +15,21 @@ bool FileEntry::content_loaded() const
|
||||
return loaded;
|
||||
}
|
||||
|
||||
std::string FileEntry::load_content()
|
||||
std::vector<std::string> FileEntry::load_content()
|
||||
{
|
||||
std::ifstream ifs(file_entry.path());
|
||||
file_content.assign( std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>() );
|
||||
|
||||
for (std::string line; std::getline(ifs, line);)
|
||||
{
|
||||
file_content.emplace_back(line);
|
||||
}
|
||||
|
||||
loaded = true;
|
||||
|
||||
return file_content;
|
||||
}
|
||||
|
||||
std::string FileEntry::get_content() const
|
||||
std::vector<std::string> FileEntry::get_content() const
|
||||
{
|
||||
return file_content;
|
||||
}
|
||||
|
@ -18,13 +18,13 @@ class FileEntry {
|
||||
fs::path relative_path;
|
||||
|
||||
[[nodiscard]] bool content_loaded() const;
|
||||
std::string load_content();
|
||||
[[nodiscard]] std::string get_content() const;
|
||||
std::vector<std::string> load_content();
|
||||
[[nodiscard]] std::vector<std::string> get_content() const;
|
||||
void clear_content();
|
||||
|
||||
private:
|
||||
|
||||
std::string file_content;
|
||||
std::vector<std::string> file_content;
|
||||
bool loaded;
|
||||
};
|
||||
|
||||
|
@ -1,11 +1,15 @@
|
||||
#include "FileContext.hpp"
|
||||
|
||||
#include "../logging.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
FileContext::FileContext(std::shared_ptr<kc::FileEntry> entry)
|
||||
: file_entry(std::move(entry)), links_parsed(false) {
|
||||
}
|
||||
|
||||
constexpr int MAX_LINE_LENGTH = 3000;
|
||||
|
||||
void FileContext::parse()
|
||||
{
|
||||
if (!file_entry->content_loaded())
|
||||
@ -53,50 +57,86 @@ void FileContext::parse(const ParseOperations operations)
|
||||
}
|
||||
|
||||
void FileContext::parse_links() {
|
||||
const std::regex link_regex(MD_MD_LINK_REGEX);
|
||||
std::string file_content = file_entry->get_content();
|
||||
std::smatch link_match;
|
||||
while(std::regex_search(file_content, link_match, link_regex)) {
|
||||
const std::regex link_regex(MD_MD_LINK_REGEX, std::regex::optimize);
|
||||
const auto file_content = file_entry->get_content();
|
||||
|
||||
links.emplace_back(link_match.str());
|
||||
file_content = link_match.suffix();
|
||||
for (const auto& line : file_content) {
|
||||
if (line.length() < MAX_LINE_LENGTH) {
|
||||
std::string line_content = line;
|
||||
std::smatch link_match;
|
||||
while(std::regex_search(line_content, link_match, link_regex)) {
|
||||
|
||||
links.emplace_back(link_match.str());
|
||||
line_content = link_match.suffix();
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FileContext::parse_images() {
|
||||
const std::regex image_regex(MD_IMAGE_LINK_REGEX);
|
||||
std::string file_content = file_entry->get_content();
|
||||
std::smatch image_match;
|
||||
while(std::regex_search(file_content, image_match, image_regex)) {
|
||||
const std::regex image_regex(MD_IMAGE_LINK_REGEX, std::regex::optimize);
|
||||
const auto file_content = file_entry->get_content();
|
||||
|
||||
images.emplace_back(image_match.str());
|
||||
file_content = image_match.suffix();
|
||||
for (const auto& line : file_content) {
|
||||
if (line.length() < MAX_LINE_LENGTH) {
|
||||
std::string line_content = line;
|
||||
std::smatch image_match;
|
||||
while(std::regex_search(line_content, image_match, image_regex)) {
|
||||
|
||||
images.emplace_back(image_match.str());
|
||||
line_content = image_match.suffix();
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FileContext::parse_tags() {
|
||||
#if __APPLE__
|
||||
std::regex tag_regex(MD_TAG_REGEX, std::regex::multiline);
|
||||
std::regex tag_regex(MD_TAG_REGEX, std::regex::multiline | std::regex::optimize);
|
||||
#else
|
||||
std::regex tag_regex(MD_TAG_REGEX);
|
||||
std::regex tag_regex(MD_TAG_REGEX, std::regex::optimize);
|
||||
#endif
|
||||
std::string file_content = file_entry->get_content();
|
||||
std::smatch tag_match;
|
||||
while(std::regex_search(file_content, tag_match, tag_regex)) {
|
||||
const auto file_content = file_entry->get_content();
|
||||
|
||||
tags.push_back(tag_match.str().substr(1));
|
||||
file_content = tag_match.suffix();
|
||||
for (const auto& line : file_content) {
|
||||
if (line.length() < MAX_LINE_LENGTH) {
|
||||
std::string line_content = line;
|
||||
std::smatch tag_match;
|
||||
while(std::regex_search(line_content, tag_match, tag_regex)) {
|
||||
|
||||
tags.push_back(tag_match.str().substr(1));
|
||||
line_content = tag_match.suffix();
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FileContext::parse_tasks() {
|
||||
const std::regex task_regex(TASK_REGEX);
|
||||
std::string file_content = file_entry->get_content();
|
||||
std::smatch task_match;
|
||||
while(std::regex_search(file_content, task_match, task_regex)) {
|
||||
const std::regex task_regex(TASK_REGEX, std::regex::optimize);
|
||||
const auto file_content = file_entry->get_content();
|
||||
|
||||
tasks.emplace_back(task_match[2], task_match[1], task_match[3]);
|
||||
file_content = task_match.suffix();
|
||||
for (const auto& line : file_content) {
|
||||
if (line.length() < MAX_LINE_LENGTH) {
|
||||
std::string line_content = line;
|
||||
std::smatch task_match;
|
||||
while(std::regex_search(line_content, task_match, task_regex)) {
|
||||
|
||||
tasks.emplace_back(task_match[2], task_match[1], task_match[3]);
|
||||
line_content = task_match.suffix();
|
||||
}
|
||||
}
|
||||
else {
|
||||
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -105,4 +145,4 @@ std::filesystem::path FileContext::abs_path(const kc::Link &link) const {
|
||||
return file_entry->file_entry.path().parent_path() / fs::path(link.link);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user