Compare commits
No commits in common. "13aee5d6f555d240d5ded597350ee49bb3ad2cd4" and "da4b330098a6e8095aea68a15e5443264959c655" have entirely different histories.
13aee5d6f5
...
da4b330098
@ -13,7 +13,7 @@ project(kc
|
||||
VERSION
|
||||
1.0
|
||||
DESCRIPTION
|
||||
"Knowledge crawler for analysing notes"
|
||||
"C++ scratchpad"
|
||||
LANGUAGES
|
||||
CXX)
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
parse/Link.cpp
|
||||
parse/FileContext.cpp
|
||||
parse/FileContextCache.cpp
|
||||
valid/link.cpp
|
||||
logging.cpp
|
||||
config.cpp
|
||||
)
|
||||
|
@ -14,14 +14,9 @@ std::shared_ptr<po::variables_map> init_config(int argc, const char *argv[])
|
||||
("help", "produce help message")
|
||||
("path,p", po::value<std::string>()->default_value("."), "set root path of knowledge base")
|
||||
("config", po::value<std::string>()->default_value("kc.ini"), "config file location")
|
||||
("command", po::value<std::string>(), "command to execute")
|
||||
("subargs", po::value<std::vector<std::string> >(), "Arguments for command")
|
||||
("index", po::value<int>()->default_value(1), "index")
|
||||
;
|
||||
|
||||
po::positional_options_description pos;
|
||||
pos.add("command", 1).add("subargs", -1);
|
||||
|
||||
po::options_description cmdline_options;
|
||||
cmdline_options.add(desc);
|
||||
|
||||
@ -35,8 +30,6 @@ std::shared_ptr<po::variables_map> init_config(int argc, const char *argv[])
|
||||
auto vm = std::make_shared<po::variables_map>();
|
||||
po::store(po::command_line_parser(argc, argv)
|
||||
.options(cmdline_options)
|
||||
.positional(pos)
|
||||
// .allow_unregistered()
|
||||
.run(),
|
||||
*vm);
|
||||
|
||||
|
@ -11,7 +11,7 @@ FileEntry::FileEntry(fs::directory_entry entry)
|
||||
|
||||
}
|
||||
|
||||
bool FileEntry::content_loaded() const
|
||||
bool FileEntry::content_loaded()
|
||||
{
|
||||
return loaded;
|
||||
}
|
||||
@ -25,7 +25,7 @@ std::string FileEntry::load_content()
|
||||
return file_content;
|
||||
}
|
||||
|
||||
std::string FileEntry::get_content() const
|
||||
std::string FileEntry::get_content()
|
||||
{
|
||||
return file_content;
|
||||
}
|
||||
|
@ -17,9 +17,9 @@ class FileEntry {
|
||||
fs::directory_entry file_entry;
|
||||
fs::path relative_path;
|
||||
|
||||
bool content_loaded() const;
|
||||
bool content_loaded();
|
||||
std::string load_content();
|
||||
std::string get_content() const;
|
||||
std::string get_content();
|
||||
void clear_content();
|
||||
|
||||
private:
|
||||
|
@ -5,9 +5,9 @@
|
||||
|
||||
static const std::string exclusions[] = {".git", ".obsidian"};
|
||||
|
||||
std::vector<std::shared_ptr<kc::FileEntry>> kc::walk_dir(const std::string dir)
|
||||
std::vector<kc::FileEntry> kc::walk_dir(std::string dir)
|
||||
{
|
||||
auto matched = std::vector<std::shared_ptr<kc::FileEntry>>();
|
||||
auto matched = std::vector<kc::FileEntry>();
|
||||
auto base_path = fs::path(dir);
|
||||
|
||||
for (auto const& dir_entry : fs::recursive_directory_iterator(base_path))
|
||||
@ -23,15 +23,14 @@ std::vector<std::shared_ptr<kc::FileEntry>> kc::walk_dir(const std::string dir)
|
||||
if (dir_entry_path_string.contains(exclusion))
|
||||
{
|
||||
excluded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!excluded)
|
||||
{
|
||||
auto entry = std::make_shared<kc::FileEntry>(dir_entry);
|
||||
auto entry = kc::FileEntry(dir_entry);
|
||||
|
||||
entry->relative_path = fs::relative(dir_entry_path, base_path);
|
||||
entry.relative_path = fs::relative(dir_entry_path, base_path);
|
||||
|
||||
matched.push_back(entry);
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "FileEntry.hpp"
|
||||
|
||||
@ -11,6 +10,6 @@ namespace fs = std::filesystem;
|
||||
|
||||
namespace kc {
|
||||
|
||||
std::vector<std::shared_ptr<kc::FileEntry>> walk_dir(std::string dir);
|
||||
std::vector<kc::FileEntry> walk_dir(std::string dir);
|
||||
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
#include "logging.hpp"
|
||||
|
||||
|
||||
namespace logging = boost::log;
|
||||
namespace src = boost::log::sources;
|
||||
namespace sinks = boost::log::sinks;
|
||||
@ -14,11 +13,10 @@ void init_logging()
|
||||
(
|
||||
keywords::file_name = "kc_%N.log",
|
||||
keywords::time_based_rotation = sinks::file::rotation_at_time_point(0, 0, 0),
|
||||
keywords::format = "[%TimeStamp%] [%ThreadID%] [%Severity%] %Message%",
|
||||
keywords::open_mode = std::ios::app
|
||||
keywords::format = "[%TimeStamp%] [%ThreadID%] [%Severity%] %Message%"
|
||||
);
|
||||
|
||||
// logging::add_console_log(std::cout, boost::log::keywords::format = "[%TimeStamp%] [%Severity%] >> %Message%");
|
||||
logging::add_console_log(std::cout, boost::log::keywords::format = "[%TimeStamp%] [%Severity%] >> %Message%");
|
||||
|
||||
logging::core::get()->set_filter
|
||||
(
|
||||
|
@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <boost/log/core.hpp>
|
||||
#include <boost/log/trivial.hpp>
|
||||
#include <boost/log/expressions.hpp>
|
||||
@ -9,16 +7,4 @@
|
||||
#include <boost/log/utility/setup/common_attributes.hpp>
|
||||
#include <boost/log/utility/setup/console.hpp>
|
||||
|
||||
void init_logging();
|
||||
|
||||
inline void print_and_log(std::string log_line)
|
||||
{
|
||||
BOOST_LOG_TRIVIAL(info) << log_line;
|
||||
std::cout << log_line << std::endl;
|
||||
}
|
||||
|
||||
inline void print_and_log_error(std::string log_line)
|
||||
{
|
||||
BOOST_LOG_TRIVIAL(error) << log_line;
|
||||
std::cout << "ERROR: " << log_line << std::endl;
|
||||
}
|
||||
void init_logging();
|
60
src/main.cpp
60
src/main.cpp
@ -10,10 +10,6 @@
|
||||
#include "config.hpp"
|
||||
#include "fs/fs.hpp"
|
||||
#include "parse/FileContextCache.hpp"
|
||||
#include "valid/link.hpp"
|
||||
|
||||
void run_validate(std::shared_ptr<boost::program_options::variables_map> config);
|
||||
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
|
||||
@ -22,41 +18,45 @@ int main(int argc, const char *argv[]) {
|
||||
BOOST_LOG_TRIVIAL(info) << "================================";
|
||||
BOOST_LOG_TRIVIAL(info) << " kc";
|
||||
BOOST_LOG_TRIVIAL(info) << "================================";
|
||||
BOOST_LOG_TRIVIAL(info) << "Starting up....";
|
||||
BOOST_LOG_TRIVIAL(info) << "starting up....";
|
||||
|
||||
auto config = init_config(argc, argv);
|
||||
|
||||
if(config)
|
||||
{
|
||||
if (config->count("command") == 1)
|
||||
{
|
||||
auto command = (*config)["command"].as<std::string>();
|
||||
auto env_path = (*config)["path"].as<std::string>();
|
||||
BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path;
|
||||
|
||||
if (command == "validate")
|
||||
auto file_cache = kc::FileContextCache();
|
||||
file_cache.load(env_path);
|
||||
file_cache.parse_all();
|
||||
|
||||
auto context = file_cache.get()[(*config)["index"].as<int>()];
|
||||
|
||||
std::cout << context->file_entry->get_content() << std::endl << std::endl << std::endl;
|
||||
|
||||
std::cout << "links: " << context->links.size() << std::endl;
|
||||
std::cout << "images: " << context->images.size() << std::endl;
|
||||
std::cout << "tags: " << context->tags.size() << std::endl << std::endl << std::endl;;
|
||||
|
||||
for (auto link : context->links)
|
||||
{
|
||||
std::cout << link.original_form << " " << link.display << " --- " << link.link << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "tag cache: " << file_cache.tag_map.size() << std::endl;
|
||||
|
||||
for (auto tag : file_cache.tag_map)
|
||||
{
|
||||
std::cout << tag.first << ": ";
|
||||
|
||||
for (auto tag_entry: tag.second)
|
||||
{
|
||||
run_validate(config);
|
||||
std::cout << tag_entry->relative_path << ", ";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
print_and_log_error("Command not found, exiting");
|
||||
return 1;
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void run_validate(std::shared_ptr<boost::program_options::variables_map> config)
|
||||
{
|
||||
auto env_path = (*config)["path"].as<std::string>();
|
||||
print_and_log("> Loading knowledge base from " + env_path);
|
||||
|
||||
auto file_cache = kc::FileContextCache();
|
||||
file_cache.load(env_path);
|
||||
file_cache.parse_all();
|
||||
|
||||
kc::validate_links(file_cache.get());
|
||||
}
|
@ -2,8 +2,8 @@
|
||||
|
||||
namespace kc {
|
||||
|
||||
FileContext::FileContext(std::shared_ptr<kc::FileEntry> entry)
|
||||
: file_entry(entry)
|
||||
FileContext::FileContext(kc::FileEntry entry)
|
||||
: file_entry(std::make_shared<kc::FileEntry>(entry))
|
||||
{
|
||||
|
||||
}
|
||||
@ -33,7 +33,7 @@ void FileContext::parse()
|
||||
std::smatch image_match;
|
||||
while(std::regex_search(file_content, image_match, image_regex)) {
|
||||
|
||||
images.push_back(kc::Link(image_match.str()));
|
||||
images.push_back(image_match.str());
|
||||
file_content = image_match.suffix();
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ namespace kc {
|
||||
class FileContext {
|
||||
public:
|
||||
|
||||
FileContext(std::shared_ptr<kc::FileEntry> entry);
|
||||
FileContext(kc::FileEntry entry);
|
||||
|
||||
std::shared_ptr<kc::FileEntry> file_entry;
|
||||
std::vector<kc::Link> links;
|
||||
|
@ -1,25 +1,20 @@
|
||||
#include "FileContextCache.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <execution>
|
||||
|
||||
#include "../fs/fs.hpp"
|
||||
#include "../logging.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
void FileContextCache::load(const std::string root_path)
|
||||
void FileContextCache::load(std::string root_path)
|
||||
{
|
||||
BOOST_LOG_TRIVIAL(trace) << "Beginning cache load";
|
||||
|
||||
auto entries = kc::walk_dir(root_path);
|
||||
this->root_path.assign(root_path);
|
||||
|
||||
for (auto entry : entries)
|
||||
{
|
||||
if (entry->relative_path.extension() == ".md")
|
||||
if (entry.relative_path.extension() == ".md")
|
||||
{
|
||||
entry->load_content();
|
||||
entry.load_content();
|
||||
}
|
||||
|
||||
file_contexts.push_back(std::make_shared<kc::FileContext>(entry));
|
||||
@ -31,14 +26,8 @@ void FileContextCache::load(const std::string root_path)
|
||||
void FileContextCache::parse_all()
|
||||
{
|
||||
tag_map.clear();
|
||||
|
||||
#if __APPLE__
|
||||
std::for_each(file_contexts.begin(), file_contexts.end(), [this](std::shared_ptr<kc::FileContext> &context)
|
||||
#else
|
||||
std::for_each(std::execution::par_unseq, file_contexts.begin(), file_contexts.end(), [this](std::shared_ptr<kc::FileContext> &context)
|
||||
#endif
|
||||
|
||||
{
|
||||
for (auto context: file_contexts)
|
||||
{
|
||||
if (context->file_entry->relative_path.extension() == ".md")
|
||||
{
|
||||
context->parse();
|
||||
@ -51,7 +40,7 @@ void FileContextCache::parse_all()
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void FileContextCache::clear()
|
||||
@ -60,19 +49,14 @@ void FileContextCache::clear()
|
||||
file_contexts.shrink_to_fit();
|
||||
}
|
||||
|
||||
size_t FileContextCache::size() const
|
||||
size_t FileContextCache::size()
|
||||
{
|
||||
return file_contexts.size();
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<kc::FileContext>> FileContextCache::get() const
|
||||
std::vector<std::shared_ptr<kc::FileContext>> FileContextCache::get()
|
||||
{
|
||||
return file_contexts;
|
||||
}
|
||||
|
||||
std::string FileContextCache::get_root_path() const
|
||||
{
|
||||
return root_path;
|
||||
}
|
||||
|
||||
}
|
@ -12,16 +12,14 @@ class FileContextCache {
|
||||
public:
|
||||
void load(std::string root_path);
|
||||
void clear();
|
||||
size_t size() const;
|
||||
std::vector<std::shared_ptr<kc::FileContext>> get() const;
|
||||
size_t size();
|
||||
std::vector<std::shared_ptr<kc::FileContext>> get();
|
||||
void parse_all();
|
||||
std::string get_root_path() const;
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::shared_ptr<kc::FileEntry>>> tag_map;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<kc::FileContext>> file_contexts;
|
||||
std::string root_path;
|
||||
};
|
||||
|
||||
}
|
@ -16,20 +16,12 @@ Link::Link(std::string original)
|
||||
|
||||
link = original_form.substr(opening_link + 1, closing_link - opening_link - 1);
|
||||
|
||||
external = link.starts_with("http");
|
||||
auto display_pos = original_form.find('#', opening_link);
|
||||
|
||||
auto sublink_pos = original_form.find('#', opening_link);
|
||||
|
||||
if(sublink_pos != std::string::npos)
|
||||
if(display_pos != std::string::npos)
|
||||
{
|
||||
sublink = original_form.substr(sublink_pos + 1, closing_link - sublink_pos - 1);
|
||||
link = original_form.substr(opening_link + 1, sublink_pos - opening_link - 1);
|
||||
display = original_form.substr(display_pos + 1, closing_link - display_pos - 1);
|
||||
}
|
||||
}
|
||||
|
||||
bool Link::is_external() const
|
||||
{
|
||||
return external;
|
||||
}
|
||||
|
||||
}
|
@ -12,13 +12,10 @@ class Link {
|
||||
std::string display;
|
||||
std::string link;
|
||||
std::string sublink;
|
||||
bool is_external() const;
|
||||
|
||||
Link(std::string original);
|
||||
|
||||
private:
|
||||
|
||||
bool external;
|
||||
};
|
||||
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
#include "link.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
|
||||
#include "../logging.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace kc {
|
||||
|
||||
std::vector<kc::FileLinkStateResult> validate_links(const std::vector<std::shared_ptr<kc::FileContext>> &contexts)
|
||||
{
|
||||
std::vector<kc::FileLinkStateResult> ret;
|
||||
|
||||
auto invalid_counter = 0;
|
||||
|
||||
for (auto context : contexts)
|
||||
{
|
||||
if (context->links.size() > 0)
|
||||
{
|
||||
for (auto link: context->links)
|
||||
{
|
||||
if(!link.is_external()) {
|
||||
|
||||
auto composed = context->file_entry->file_entry.path().parent_path() / fs::path(link.link);
|
||||
|
||||
auto entry = fs::directory_entry(composed);
|
||||
|
||||
if(!entry.exists())
|
||||
{
|
||||
print_and_log("Invalid link: " + std::string(context->file_entry->file_entry.path()) + " -> " + link.original_form);
|
||||
invalid_counter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_counter == 0)
|
||||
{
|
||||
print_and_log("All links valid");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "../parse/FileContext.hpp"
|
||||
|
||||
namespace kc {
|
||||
|
||||
enum LinkState {
|
||||
VALID, INVALID
|
||||
};
|
||||
|
||||
struct LinkStateResult {
|
||||
LinkState link_state;
|
||||
kc::Link link;
|
||||
};
|
||||
|
||||
struct FileLinkStateResult {
|
||||
std::shared_ptr<FileContext> file_context;
|
||||
std::vector<LinkStateResult> link_states;
|
||||
};
|
||||
|
||||
std::vector<FileLinkStateResult> validate_links(const std::vector<std::shared_ptr<kc::FileContext>> &contexts);
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user