parsing links and tags

This commit is contained in:
Andy Pack 2023-06-09 23:31:07 +01:00
parent 3d63ff29a7
commit e7dd0a7886
Signed by: sarsoo
GPG Key ID: A55BA3536A5E0ED7
15 changed files with 227 additions and 12 deletions

View File

@ -2,6 +2,9 @@
main.cpp main.cpp
fs/fs.cpp fs/fs.cpp
fs/FileEntry.cpp fs/FileEntry.cpp
parse/Link.cpp
parse/FileContext.cpp
parse/FileContextCache.cpp
logging.cpp logging.cpp
config.cpp config.cpp
) )

View File

@ -14,6 +14,7 @@ std::shared_ptr<po::variables_map> init_config(int argc, const char *argv[])
("help", "produce help message") ("help", "produce help message")
("path,p", po::value<std::string>()->default_value("."), "set root path of knowledge base") ("path,p", po::value<std::string>()->default_value("."), "set root path of knowledge base")
("config", po::value<std::string>()->default_value("kc.ini"), "config file location") ("config", po::value<std::string>()->default_value("kc.ini"), "config file location")
("index", po::value<int>()->default_value(1), "index")
; ;
po::options_description cmdline_options; po::options_description cmdline_options;

View File

@ -1 +1,8 @@
#pragma once #pragma once
#include <string>
static const std::string MD_LINK_REGEX = R"(\[.*?\]\(.*?\))";
static const std::string MD_MD_LINK_REGEX = R"(\[.*?\]\(.*?\.md\))";
static const std::string MD_IMAGE_LINK_REGEX = R"(!\[.*?\]\(.*?\.png\))";
static const std::string MD_TAG_REGEX = R"(#{1}[^\s#.]+)";

View File

@ -5,15 +5,22 @@
namespace kc { namespace kc {
FileEntry::FileEntry(fs::directory_entry entry)
: file_entry(entry)
{
}
bool FileEntry::content_loaded() bool FileEntry::content_loaded()
{ {
return !file_content.empty(); return loaded;
} }
std::string FileEntry::load_content() std::string FileEntry::load_content()
{ {
std::ifstream ifs(file_entry.path()); std::ifstream ifs(file_entry.path());
file_content.assign( (std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()) ); file_content.assign( (std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()) );
loaded = true;
return file_content; return file_content;
} }
@ -27,6 +34,7 @@ void FileEntry::clear_content()
{ {
file_content.clear(); file_content.clear();
file_content.shrink_to_fit(); file_content.shrink_to_fit();
loaded = false;
} }
} }

View File

@ -2,6 +2,9 @@
#include <string> #include <string>
#include <filesystem> #include <filesystem>
#include "../parse/Link.hpp"
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace kc { namespace kc {
@ -9,6 +12,8 @@ namespace kc {
class FileEntry { class FileEntry {
public: public:
FileEntry(fs::directory_entry entry);
fs::directory_entry file_entry; fs::directory_entry file_entry;
fs::path relative_path; fs::path relative_path;
@ -20,6 +25,7 @@ class FileEntry {
private: private:
std::string file_content; std::string file_content;
bool loaded;
}; };
} }

View File

@ -28,9 +28,8 @@ std::vector<kc::FileEntry> kc::walk_dir(std::string dir)
if (!excluded) if (!excluded)
{ {
auto entry = kc::FileEntry(); auto entry = kc::FileEntry(dir_entry);
entry.file_entry = dir_entry;
entry.relative_path = fs::relative(dir_entry_path, base_path); entry.relative_path = fs::relative(dir_entry_path, base_path);
matched.push_back(entry); matched.push_back(entry);

View File

@ -1,12 +1,5 @@
#include "logging.hpp" #include "logging.hpp"
#include <boost/log/core.hpp>
#include <boost/log/trivial.hpp>
#include <boost/log/expressions.hpp>
#include <boost/log/utility/setup/file.hpp>
#include <boost/log/utility/setup/common_attributes.hpp>
#include <boost/log/utility/setup/console.hpp>
namespace logging = boost::log; namespace logging = boost::log;
namespace src = boost::log::sources; namespace src = boost::log::sources;
namespace sinks = boost::log::sinks; namespace sinks = boost::log::sinks;
@ -27,7 +20,7 @@ void init_logging()
logging::core::get()->set_filter logging::core::get()->set_filter
( (
logging::trivial::severity >= logging::trivial::info logging::trivial::severity >= logging::trivial::debug
); );
logging::add_common_attributes(); logging::add_common_attributes();

View File

@ -1,3 +1,10 @@
#pragma once #pragma once
#include <boost/log/core.hpp>
#include <boost/log/trivial.hpp>
#include <boost/log/expressions.hpp>
#include <boost/log/utility/setup/file.hpp>
#include <boost/log/utility/setup/common_attributes.hpp>
#include <boost/log/utility/setup/console.hpp>
void init_logging(); void init_logging();

View File

@ -3,11 +3,13 @@
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <regex>
#include "const.hpp" #include "const.hpp"
#include "logging.hpp" #include "logging.hpp"
#include "config.hpp" #include "config.hpp"
#include "fs/fs.hpp" #include "fs/fs.hpp"
#include "parse/FileContextCache.hpp"
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
@ -25,6 +27,21 @@ int main(int argc, const char *argv[]) {
auto env_path = (*config)["path"].as<std::string>(); auto env_path = (*config)["path"].as<std::string>();
BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path; BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path;
auto entries = kc::walk_dir(env_path); auto file_cache = kc::FileContextCache();
file_cache.load(env_path);
file_cache.parse_all();
auto context = file_cache.get()[(*config)["index"].as<int>()];
std::cout << context->file_entry.get_content() << std::endl << std::endl << std::endl;
std::cout << "links: " << context->links.size() << std::endl;
std::cout << "tags: " << context->tags.size() << std::endl << std::endl << std::endl;;
for (auto link : context->links)
{
std::cout << link.original_form << std::endl;
}
} }
} }

41
src/parse/FileContext.cpp Normal file
View File

@ -0,0 +1,41 @@
#include "FileContext.hpp"
namespace kc {
FileContext::FileContext(kc::FileEntry entry)
: file_entry(entry)
{
}
void FileContext::parse()
{
if (!file_entry.content_loaded())
{
throw std::logic_error("cannot parse from file entry as it has not been loaded");
}
links.clear();
tags.clear();
std::regex link_regex(MD_MD_LINK_REGEX);
std::string file_content = file_entry.get_content();
std::smatch link_match;
while(std::regex_search(file_content, link_match, link_regex)) {
links.push_back(kc::Link(link_match.str()));
file_content = link_match.suffix();
}
std::regex tag_regex(MD_TAG_REGEX);
file_content = file_entry.get_content();
std::smatch tag_match;
while(std::regex_search(file_content, tag_match, tag_regex)) {
tags.push_back(tag_match.str());
file_content = tag_match.suffix();
}
}
}

28
src/parse/FileContext.hpp Normal file
View File

@ -0,0 +1,28 @@
#pragma once
#include <vector>
#include <regex>
#include "../fs/FileEntry.hpp"
#include "Link.hpp"
#include "../const.hpp"
namespace kc {
class FileContext {
public:
FileContext(kc::FileEntry entry);
kc::FileEntry file_entry;
std::vector<kc::Link> links;
std::vector<std::string> tags;
void parse();
private:
bool links_parsed;
};
}

View File

@ -0,0 +1,53 @@
#include "FileContextCache.hpp"
#include "../fs/fs.hpp"
#include "../logging.hpp"
namespace kc {
void FileContextCache::load(std::string root_path)
{
BOOST_LOG_TRIVIAL(trace) << "Beginning cache load";
auto entries = kc::walk_dir(root_path);
for (auto entry : entries)
{
if (entry.relative_path.extension() == ".md")
{
entry.load_content();
}
file_contexts.push_back(std::make_shared<kc::FileContext>(entry));
}
BOOST_LOG_TRIVIAL(debug) << "Loaded " << size() << " entries";
}
void FileContextCache::parse_all()
{
for (auto context: file_contexts)
{
if (context->file_entry.relative_path.extension() == ".md")
{
context->parse();
}
}
}
void FileContextCache::clear()
{
file_contexts.clear();
file_contexts.shrink_to_fit();
}
size_t FileContextCache::size()
{
return file_contexts.size();
}
std::vector<std::shared_ptr<kc::FileContext>> FileContextCache::get()
{
return file_contexts;
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <vector>
#include <memory>
#include "FileContext.hpp"
namespace kc {
class FileContextCache {
public:
void load(std::string root_path);
void clear();
size_t size();
std::vector<std::shared_ptr<kc::FileContext>> get();
void parse_all();
private:
std::vector<std::shared_ptr<kc::FileContext>> file_contexts;
};
}

11
src/parse/Link.cpp Normal file
View File

@ -0,0 +1,11 @@
#include "Link.hpp"
namespace kc {
Link::Link(std::string original)
: original_form(original)
{
}
}

20
src/parse/Link.hpp Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <string>
namespace kc {
class Link {
public:
std::string original_form;
std::string display;
std::string link;
std::string sublink;
Link(std::string original);
private:
};
}