parsing links and tags
This commit is contained in:
parent
3d63ff29a7
commit
e7dd0a7886
@ -2,6 +2,9 @@
|
||||
main.cpp
|
||||
fs/fs.cpp
|
||||
fs/FileEntry.cpp
|
||||
parse/Link.cpp
|
||||
parse/FileContext.cpp
|
||||
parse/FileContextCache.cpp
|
||||
logging.cpp
|
||||
config.cpp
|
||||
)
|
||||
|
@ -14,6 +14,7 @@ std::shared_ptr<po::variables_map> init_config(int argc, const char *argv[])
|
||||
("help", "produce help message")
|
||||
("path,p", po::value<std::string>()->default_value("."), "set root path of knowledge base")
|
||||
("config", po::value<std::string>()->default_value("kc.ini"), "config file location")
|
||||
("index", po::value<int>()->default_value(1), "index")
|
||||
;
|
||||
|
||||
po::options_description cmdline_options;
|
||||
|
@ -1 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
static const std::string MD_LINK_REGEX = R"(\[.*?\]\(.*?\))";
|
||||
static const std::string MD_MD_LINK_REGEX = R"(\[.*?\]\(.*?\.md\))";
|
||||
static const std::string MD_IMAGE_LINK_REGEX = R"(!\[.*?\]\(.*?\.png\))";
|
||||
static const std::string MD_TAG_REGEX = R"(#{1}[^\s#.]+)";
|
@ -5,15 +5,22 @@
|
||||
|
||||
namespace kc {
|
||||
|
||||
FileEntry::FileEntry(fs::directory_entry entry)
|
||||
: file_entry(entry)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool FileEntry::content_loaded()
|
||||
{
|
||||
return !file_content.empty();
|
||||
return loaded;
|
||||
}
|
||||
|
||||
std::string FileEntry::load_content()
|
||||
{
|
||||
std::ifstream ifs(file_entry.path());
|
||||
file_content.assign( (std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()) );
|
||||
loaded = true;
|
||||
|
||||
return file_content;
|
||||
}
|
||||
@ -27,6 +34,7 @@ void FileEntry::clear_content()
|
||||
{
|
||||
file_content.clear();
|
||||
file_content.shrink_to_fit();
|
||||
loaded = false;
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,9 @@
|
||||
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
|
||||
#include "../parse/Link.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace kc {
|
||||
@ -9,6 +12,8 @@ namespace kc {
|
||||
class FileEntry {
|
||||
public:
|
||||
|
||||
FileEntry(fs::directory_entry entry);
|
||||
|
||||
fs::directory_entry file_entry;
|
||||
fs::path relative_path;
|
||||
|
||||
@ -20,6 +25,7 @@ class FileEntry {
|
||||
private:
|
||||
|
||||
std::string file_content;
|
||||
bool loaded;
|
||||
};
|
||||
|
||||
}
|
@ -28,9 +28,8 @@ std::vector<kc::FileEntry> kc::walk_dir(std::string dir)
|
||||
|
||||
if (!excluded)
|
||||
{
|
||||
auto entry = kc::FileEntry();
|
||||
auto entry = kc::FileEntry(dir_entry);
|
||||
|
||||
entry.file_entry = dir_entry;
|
||||
entry.relative_path = fs::relative(dir_entry_path, base_path);
|
||||
|
||||
matched.push_back(entry);
|
||||
|
@ -1,12 +1,5 @@
|
||||
#include "logging.hpp"
|
||||
|
||||
#include <boost/log/core.hpp>
|
||||
#include <boost/log/trivial.hpp>
|
||||
#include <boost/log/expressions.hpp>
|
||||
#include <boost/log/utility/setup/file.hpp>
|
||||
#include <boost/log/utility/setup/common_attributes.hpp>
|
||||
#include <boost/log/utility/setup/console.hpp>
|
||||
|
||||
namespace logging = boost::log;
|
||||
namespace src = boost::log::sources;
|
||||
namespace sinks = boost::log::sinks;
|
||||
@ -27,7 +20,7 @@ void init_logging()
|
||||
|
||||
logging::core::get()->set_filter
|
||||
(
|
||||
logging::trivial::severity >= logging::trivial::info
|
||||
logging::trivial::severity >= logging::trivial::debug
|
||||
);
|
||||
|
||||
logging::add_common_attributes();
|
||||
|
@ -1,3 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/log/core.hpp>
|
||||
#include <boost/log/trivial.hpp>
|
||||
#include <boost/log/expressions.hpp>
|
||||
#include <boost/log/utility/setup/file.hpp>
|
||||
#include <boost/log/utility/setup/common_attributes.hpp>
|
||||
#include <boost/log/utility/setup/console.hpp>
|
||||
|
||||
void init_logging();
|
19
src/main.cpp
19
src/main.cpp
@ -3,11 +3,13 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
|
||||
#include "const.hpp"
|
||||
#include "logging.hpp"
|
||||
#include "config.hpp"
|
||||
#include "fs/fs.hpp"
|
||||
#include "parse/FileContextCache.hpp"
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
|
||||
@ -25,6 +27,21 @@ int main(int argc, const char *argv[]) {
|
||||
auto env_path = (*config)["path"].as<std::string>();
|
||||
BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path;
|
||||
|
||||
auto entries = kc::walk_dir(env_path);
|
||||
auto file_cache = kc::FileContextCache();
|
||||
file_cache.load(env_path);
|
||||
file_cache.parse_all();
|
||||
|
||||
auto context = file_cache.get()[(*config)["index"].as<int>()];
|
||||
|
||||
std::cout << context->file_entry.get_content() << std::endl << std::endl << std::endl;
|
||||
|
||||
std::cout << "links: " << context->links.size() << std::endl;
|
||||
std::cout << "tags: " << context->tags.size() << std::endl << std::endl << std::endl;;
|
||||
|
||||
for (auto link : context->links)
|
||||
{
|
||||
std::cout << link.original_form << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
41
src/parse/FileContext.cpp
Normal file
41
src/parse/FileContext.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include "FileContext.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
FileContext::FileContext(kc::FileEntry entry)
|
||||
: file_entry(entry)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void FileContext::parse()
|
||||
{
|
||||
if (!file_entry.content_loaded())
|
||||
{
|
||||
throw std::logic_error("cannot parse from file entry as it has not been loaded");
|
||||
}
|
||||
|
||||
links.clear();
|
||||
tags.clear();
|
||||
|
||||
std::regex link_regex(MD_MD_LINK_REGEX);
|
||||
std::string file_content = file_entry.get_content();
|
||||
std::smatch link_match;
|
||||
while(std::regex_search(file_content, link_match, link_regex)) {
|
||||
|
||||
links.push_back(kc::Link(link_match.str()));
|
||||
file_content = link_match.suffix();
|
||||
}
|
||||
|
||||
std::regex tag_regex(MD_TAG_REGEX);
|
||||
file_content = file_entry.get_content();
|
||||
std::smatch tag_match;
|
||||
while(std::regex_search(file_content, tag_match, tag_regex)) {
|
||||
|
||||
tags.push_back(tag_match.str());
|
||||
file_content = tag_match.suffix();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
28
src/parse/FileContext.hpp
Normal file
28
src/parse/FileContext.hpp
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <regex>
|
||||
|
||||
#include "../fs/FileEntry.hpp"
|
||||
#include "Link.hpp"
|
||||
#include "../const.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
class FileContext {
|
||||
public:
|
||||
|
||||
FileContext(kc::FileEntry entry);
|
||||
|
||||
kc::FileEntry file_entry;
|
||||
std::vector<kc::Link> links;
|
||||
std::vector<std::string> tags;
|
||||
|
||||
void parse();
|
||||
|
||||
private:
|
||||
|
||||
bool links_parsed;
|
||||
};
|
||||
|
||||
}
|
53
src/parse/FileContextCache.cpp
Normal file
53
src/parse/FileContextCache.cpp
Normal file
@ -0,0 +1,53 @@
|
||||
#include "FileContextCache.hpp"
|
||||
#include "../fs/fs.hpp"
|
||||
#include "../logging.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
void FileContextCache::load(std::string root_path)
|
||||
{
|
||||
BOOST_LOG_TRIVIAL(trace) << "Beginning cache load";
|
||||
|
||||
auto entries = kc::walk_dir(root_path);
|
||||
|
||||
for (auto entry : entries)
|
||||
{
|
||||
if (entry.relative_path.extension() == ".md")
|
||||
{
|
||||
entry.load_content();
|
||||
}
|
||||
|
||||
file_contexts.push_back(std::make_shared<kc::FileContext>(entry));
|
||||
}
|
||||
|
||||
BOOST_LOG_TRIVIAL(debug) << "Loaded " << size() << " entries";
|
||||
}
|
||||
|
||||
void FileContextCache::parse_all()
|
||||
{
|
||||
for (auto context: file_contexts)
|
||||
{
|
||||
if (context->file_entry.relative_path.extension() == ".md")
|
||||
{
|
||||
context->parse();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FileContextCache::clear()
|
||||
{
|
||||
file_contexts.clear();
|
||||
file_contexts.shrink_to_fit();
|
||||
}
|
||||
|
||||
size_t FileContextCache::size()
|
||||
{
|
||||
return file_contexts.size();
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<kc::FileContext>> FileContextCache::get()
|
||||
{
|
||||
return file_contexts;
|
||||
}
|
||||
|
||||
}
|
21
src/parse/FileContextCache.hpp
Normal file
21
src/parse/FileContextCache.hpp
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "FileContext.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
class FileContextCache {
|
||||
public:
|
||||
void load(std::string root_path);
|
||||
void clear();
|
||||
size_t size();
|
||||
std::vector<std::shared_ptr<kc::FileContext>> get();
|
||||
void parse_all();
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<kc::FileContext>> file_contexts;
|
||||
};
|
||||
|
||||
}
|
11
src/parse/Link.cpp
Normal file
11
src/parse/Link.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include "Link.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
Link::Link(std::string original)
|
||||
: original_form(original)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
20
src/parse/Link.hpp
Normal file
20
src/parse/Link.hpp
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace kc {
|
||||
|
||||
class Link {
|
||||
public:
|
||||
|
||||
std::string original_form;
|
||||
std::string display;
|
||||
std::string link;
|
||||
std::string sublink;
|
||||
|
||||
Link(std::string original);
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user