reading file line by line, skipping long lines when parsing

This commit is contained in:
Andy Pack 2025-01-28 19:01:56 +00:00
parent 17c39cdc6a
commit 75ae416c52
Signed by: sarsoo
GPG Key ID: A55BA3536A5E0ED7
3 changed files with 77 additions and 32 deletions

@ -15,16 +15,21 @@ bool FileEntry::content_loaded() const
return loaded;
}
std::string FileEntry::load_content()
std::vector<std::string> FileEntry::load_content()
{
std::ifstream ifs(file_entry.path());
file_content.assign( std::istreambuf_iterator<char>(ifs), std::istreambuf_iterator<char>() );
for (std::string line; std::getline(ifs, line);)
{
file_content.emplace_back(line);
}
loaded = true;
return file_content;
}
std::string FileEntry::get_content() const
std::vector<std::string> FileEntry::get_content() const
{
return file_content;
}

@ -18,13 +18,13 @@ class FileEntry {
fs::path relative_path;
[[nodiscard]] bool content_loaded() const;
std::string load_content();
[[nodiscard]] std::string get_content() const;
std::vector<std::string> load_content();
[[nodiscard]] std::vector<std::string> get_content() const;
void clear_content();
private:
std::string file_content;
std::vector<std::string> file_content;
bool loaded;
};

@ -1,11 +1,15 @@
#include "FileContext.hpp"
#include "../logging.hpp"
namespace kc {
FileContext::FileContext(std::shared_ptr<kc::FileEntry> entry)
: file_entry(std::move(entry)), links_parsed(false) {
}
constexpr int MAX_LINE_LENGTH = 3000;
void FileContext::parse()
{
if (!file_entry->content_loaded())
@ -53,50 +57,86 @@ void FileContext::parse(const ParseOperations operations)
}
void FileContext::parse_links() {
const std::regex link_regex(MD_MD_LINK_REGEX);
std::string file_content = file_entry->get_content();
std::smatch link_match;
while(std::regex_search(file_content, link_match, link_regex)) {
const std::regex link_regex(MD_MD_LINK_REGEX, std::regex::optimize);
const auto file_content = file_entry->get_content();
links.emplace_back(link_match.str());
file_content = link_match.suffix();
for (const auto& line : file_content) {
if (line.length() < MAX_LINE_LENGTH) {
std::string line_content = line;
std::smatch link_match;
while(std::regex_search(line_content, link_match, link_regex)) {
links.emplace_back(link_match.str());
line_content = link_match.suffix();
}
}
else {
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
}
}
}
void FileContext::parse_images() {
const std::regex image_regex(MD_IMAGE_LINK_REGEX);
std::string file_content = file_entry->get_content();
std::smatch image_match;
while(std::regex_search(file_content, image_match, image_regex)) {
const std::regex image_regex(MD_IMAGE_LINK_REGEX, std::regex::optimize);
const auto file_content = file_entry->get_content();
images.emplace_back(image_match.str());
file_content = image_match.suffix();
for (const auto& line : file_content) {
if (line.length() < MAX_LINE_LENGTH) {
std::string line_content = line;
std::smatch image_match;
while(std::regex_search(line_content, image_match, image_regex)) {
images.emplace_back(image_match.str());
line_content = image_match.suffix();
}
}
else {
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
}
}
}
void FileContext::parse_tags() {
#if __APPLE__
std::regex tag_regex(MD_TAG_REGEX, std::regex::multiline);
std::regex tag_regex(MD_TAG_REGEX, std::regex::multiline | std::regex::optimize);
#else
std::regex tag_regex(MD_TAG_REGEX);
std::regex tag_regex(MD_TAG_REGEX, std::regex::optimize);
#endif
std::string file_content = file_entry->get_content();
std::smatch tag_match;
while(std::regex_search(file_content, tag_match, tag_regex)) {
const auto file_content = file_entry->get_content();
tags.push_back(tag_match.str().substr(1));
file_content = tag_match.suffix();
for (const auto& line : file_content) {
if (line.length() < MAX_LINE_LENGTH) {
std::string line_content = line;
std::smatch tag_match;
while(std::regex_search(line_content, tag_match, tag_regex)) {
tags.push_back(tag_match.str().substr(1));
line_content = tag_match.suffix();
}
}
else {
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
}
}
}
void FileContext::parse_tasks() {
const std::regex task_regex(TASK_REGEX);
std::string file_content = file_entry->get_content();
std::smatch task_match;
while(std::regex_search(file_content, task_match, task_regex)) {
const std::regex task_regex(TASK_REGEX, std::regex::optimize);
const auto file_content = file_entry->get_content();
tasks.emplace_back(task_match[2], task_match[1], task_match[3]);
file_content = task_match.suffix();
for (const auto& line : file_content) {
if (line.length() < MAX_LINE_LENGTH) {
std::string line_content = line;
std::smatch task_match;
while(std::regex_search(line_content, task_match, task_regex)) {
tasks.emplace_back(task_match[2], task_match[1], task_match[3]);
line_content = task_match.suffix();
}
}
else {
BOOST_LOG_TRIVIAL(error) << "Skipping line because too long: " << line;
}
}
}
@ -105,4 +145,4 @@ std::filesystem::path FileContext::abs_path(const kc::Link &link) const {
return file_entry->file_entry.path().parent_path() / fs::path(link.link);
}
}
}