Compare commits

...

5 Commits

Author SHA1 Message Date
13aee5d6f5
link validation working 2023-06-17 10:58:10 +01:00
47a6e1e522
validating links 2023-06-17 10:18:57 +01:00
778af44768
more pointer fiddling 2023-06-17 10:18:38 +01:00
560b613ca5
tweaking pointers 2023-06-17 10:18:18 +01:00
85941393bf
adding const to function 2023-06-17 10:17:20 +01:00
18 changed files with 187 additions and 59 deletions

View File

@ -13,7 +13,7 @@ project(kc
VERSION VERSION
1.0 1.0
DESCRIPTION DESCRIPTION
"C++ scratchpad" "Knowledge crawler for analysing notes"
LANGUAGES LANGUAGES
CXX) CXX)

View File

@ -5,6 +5,7 @@
parse/Link.cpp parse/Link.cpp
parse/FileContext.cpp parse/FileContext.cpp
parse/FileContextCache.cpp parse/FileContextCache.cpp
valid/link.cpp
logging.cpp logging.cpp
config.cpp config.cpp
) )

View File

@ -14,9 +14,14 @@ std::shared_ptr<po::variables_map> init_config(int argc, const char *argv[])
("help", "produce help message") ("help", "produce help message")
("path,p", po::value<std::string>()->default_value("."), "set root path of knowledge base") ("path,p", po::value<std::string>()->default_value("."), "set root path of knowledge base")
("config", po::value<std::string>()->default_value("kc.ini"), "config file location") ("config", po::value<std::string>()->default_value("kc.ini"), "config file location")
("command", po::value<std::string>(), "command to execute")
("subargs", po::value<std::vector<std::string> >(), "Arguments for command")
("index", po::value<int>()->default_value(1), "index") ("index", po::value<int>()->default_value(1), "index")
; ;
po::positional_options_description pos;
pos.add("command", 1).add("subargs", -1);
po::options_description cmdline_options; po::options_description cmdline_options;
cmdline_options.add(desc); cmdline_options.add(desc);
@ -30,6 +35,8 @@ std::shared_ptr<po::variables_map> init_config(int argc, const char *argv[])
auto vm = std::make_shared<po::variables_map>(); auto vm = std::make_shared<po::variables_map>();
po::store(po::command_line_parser(argc, argv) po::store(po::command_line_parser(argc, argv)
.options(cmdline_options) .options(cmdline_options)
.positional(pos)
// .allow_unregistered()
.run(), .run(),
*vm); *vm);

View File

@ -11,7 +11,7 @@ FileEntry::FileEntry(fs::directory_entry entry)
} }
bool FileEntry::content_loaded() bool FileEntry::content_loaded() const
{ {
return loaded; return loaded;
} }
@ -25,7 +25,7 @@ std::string FileEntry::load_content()
return file_content; return file_content;
} }
std::string FileEntry::get_content() std::string FileEntry::get_content() const
{ {
return file_content; return file_content;
} }

View File

@ -17,9 +17,9 @@ class FileEntry {
fs::directory_entry file_entry; fs::directory_entry file_entry;
fs::path relative_path; fs::path relative_path;
bool content_loaded(); bool content_loaded() const;
std::string load_content(); std::string load_content();
std::string get_content(); std::string get_content() const;
void clear_content(); void clear_content();
private: private:

View File

@ -5,9 +5,9 @@
static const std::string exclusions[] = {".git", ".obsidian"}; static const std::string exclusions[] = {".git", ".obsidian"};
std::vector<kc::FileEntry> kc::walk_dir(std::string dir) std::vector<std::shared_ptr<kc::FileEntry>> kc::walk_dir(const std::string dir)
{ {
auto matched = std::vector<kc::FileEntry>(); auto matched = std::vector<std::shared_ptr<kc::FileEntry>>();
auto base_path = fs::path(dir); auto base_path = fs::path(dir);
for (auto const& dir_entry : fs::recursive_directory_iterator(base_path)) for (auto const& dir_entry : fs::recursive_directory_iterator(base_path))
@ -23,14 +23,15 @@ std::vector<kc::FileEntry> kc::walk_dir(std::string dir)
if (dir_entry_path_string.contains(exclusion)) if (dir_entry_path_string.contains(exclusion))
{ {
excluded = true; excluded = true;
break;
} }
} }
if (!excluded) if (!excluded)
{ {
auto entry = kc::FileEntry(dir_entry); auto entry = std::make_shared<kc::FileEntry>(dir_entry);
entry.relative_path = fs::relative(dir_entry_path, base_path); entry->relative_path = fs::relative(dir_entry_path, base_path);
matched.push_back(entry); matched.push_back(entry);
} }

View File

@ -3,6 +3,7 @@
#include <string> #include <string>
#include <filesystem> #include <filesystem>
#include <vector> #include <vector>
#include <memory>
#include "FileEntry.hpp" #include "FileEntry.hpp"
@ -10,6 +11,6 @@ namespace fs = std::filesystem;
namespace kc { namespace kc {
std::vector<kc::FileEntry> walk_dir(std::string dir); std::vector<std::shared_ptr<kc::FileEntry>> walk_dir(std::string dir);
} }

View File

@ -1,5 +1,6 @@
#include "logging.hpp" #include "logging.hpp"
namespace logging = boost::log; namespace logging = boost::log;
namespace src = boost::log::sources; namespace src = boost::log::sources;
namespace sinks = boost::log::sinks; namespace sinks = boost::log::sinks;
@ -13,10 +14,11 @@ void init_logging()
( (
keywords::file_name = "kc_%N.log", keywords::file_name = "kc_%N.log",
keywords::time_based_rotation = sinks::file::rotation_at_time_point(0, 0, 0), keywords::time_based_rotation = sinks::file::rotation_at_time_point(0, 0, 0),
keywords::format = "[%TimeStamp%] [%ThreadID%] [%Severity%] %Message%" keywords::format = "[%TimeStamp%] [%ThreadID%] [%Severity%] %Message%",
keywords::open_mode = std::ios::app
); );
logging::add_console_log(std::cout, boost::log::keywords::format = "[%TimeStamp%] [%Severity%] >> %Message%"); // logging::add_console_log(std::cout, boost::log::keywords::format = "[%TimeStamp%] [%Severity%] >> %Message%");
logging::core::get()->set_filter logging::core::get()->set_filter
( (

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#include <iostream>
#include <boost/log/core.hpp> #include <boost/log/core.hpp>
#include <boost/log/trivial.hpp> #include <boost/log/trivial.hpp>
#include <boost/log/expressions.hpp> #include <boost/log/expressions.hpp>
@ -8,3 +10,15 @@
#include <boost/log/utility/setup/console.hpp> #include <boost/log/utility/setup/console.hpp>
void init_logging(); void init_logging();
inline void print_and_log(std::string log_line)
{
BOOST_LOG_TRIVIAL(info) << log_line;
std::cout << log_line << std::endl;
}
inline void print_and_log_error(std::string log_line)
{
BOOST_LOG_TRIVIAL(error) << log_line;
std::cout << "ERROR: " << log_line << std::endl;
}

View File

@ -10,6 +10,10 @@
#include "config.hpp" #include "config.hpp"
#include "fs/fs.hpp" #include "fs/fs.hpp"
#include "parse/FileContextCache.hpp" #include "parse/FileContextCache.hpp"
#include "valid/link.hpp"
void run_validate(std::shared_ptr<boost::program_options::variables_map> config);
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
@ -18,45 +22,41 @@ int main(int argc, const char *argv[]) {
BOOST_LOG_TRIVIAL(info) << "================================"; BOOST_LOG_TRIVIAL(info) << "================================";
BOOST_LOG_TRIVIAL(info) << " kc"; BOOST_LOG_TRIVIAL(info) << " kc";
BOOST_LOG_TRIVIAL(info) << "================================"; BOOST_LOG_TRIVIAL(info) << "================================";
BOOST_LOG_TRIVIAL(info) << "starting up...."; BOOST_LOG_TRIVIAL(info) << "Starting up....";
auto config = init_config(argc, argv); auto config = init_config(argc, argv);
if(config) if(config)
{ {
auto env_path = (*config)["path"].as<std::string>(); if (config->count("command") == 1)
BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path;
auto file_cache = kc::FileContextCache();
file_cache.load(env_path);
file_cache.parse_all();
auto context = file_cache.get()[(*config)["index"].as<int>()];
std::cout << context->file_entry->get_content() << std::endl << std::endl << std::endl;
std::cout << "links: " << context->links.size() << std::endl;
std::cout << "images: " << context->images.size() << std::endl;
std::cout << "tags: " << context->tags.size() << std::endl << std::endl << std::endl;;
for (auto link : context->links)
{ {
std::cout << link.original_form << " " << link.display << " --- " << link.link << std::endl; auto command = (*config)["command"].as<std::string>();
}
std::cout << "tag cache: " << file_cache.tag_map.size() << std::endl; if (command == "validate")
for (auto tag : file_cache.tag_map)
{
std::cout << tag.first << ": ";
for (auto tag_entry: tag.second)
{ {
std::cout << tag_entry->relative_path << ", "; run_validate(config);
} }
}
std::cout << std::endl; else
{
print_and_log_error("Command not found, exiting");
return 1;
} }
return 0;
} }
return 1;
}
void run_validate(std::shared_ptr<boost::program_options::variables_map> config)
{
auto env_path = (*config)["path"].as<std::string>();
print_and_log("> Loading knowledge base from " + env_path);
auto file_cache = kc::FileContextCache();
file_cache.load(env_path);
file_cache.parse_all();
kc::validate_links(file_cache.get());
} }

View File

@ -2,8 +2,8 @@
namespace kc { namespace kc {
FileContext::FileContext(kc::FileEntry entry) FileContext::FileContext(std::shared_ptr<kc::FileEntry> entry)
: file_entry(std::make_shared<kc::FileEntry>(entry)) : file_entry(entry)
{ {
} }
@ -33,7 +33,7 @@ void FileContext::parse()
std::smatch image_match; std::smatch image_match;
while(std::regex_search(file_content, image_match, image_regex)) { while(std::regex_search(file_content, image_match, image_regex)) {
images.push_back(image_match.str()); images.push_back(kc::Link(image_match.str()));
file_content = image_match.suffix(); file_content = image_match.suffix();
} }

View File

@ -13,7 +13,7 @@ namespace kc {
class FileContext { class FileContext {
public: public:
FileContext(kc::FileEntry entry); FileContext(std::shared_ptr<kc::FileEntry> entry);
std::shared_ptr<kc::FileEntry> file_entry; std::shared_ptr<kc::FileEntry> file_entry;
std::vector<kc::Link> links; std::vector<kc::Link> links;

View File

@ -1,20 +1,25 @@
#include "FileContextCache.hpp" #include "FileContextCache.hpp"
#include <algorithm>
#include <execution>
#include "../fs/fs.hpp" #include "../fs/fs.hpp"
#include "../logging.hpp" #include "../logging.hpp"
namespace kc { namespace kc {
void FileContextCache::load(std::string root_path) void FileContextCache::load(const std::string root_path)
{ {
BOOST_LOG_TRIVIAL(trace) << "Beginning cache load"; BOOST_LOG_TRIVIAL(trace) << "Beginning cache load";
auto entries = kc::walk_dir(root_path); auto entries = kc::walk_dir(root_path);
this->root_path.assign(root_path);
for (auto entry : entries) for (auto entry : entries)
{ {
if (entry.relative_path.extension() == ".md") if (entry->relative_path.extension() == ".md")
{ {
entry.load_content(); entry->load_content();
} }
file_contexts.push_back(std::make_shared<kc::FileContext>(entry)); file_contexts.push_back(std::make_shared<kc::FileContext>(entry));
@ -26,7 +31,13 @@ void FileContextCache::load(std::string root_path)
void FileContextCache::parse_all() void FileContextCache::parse_all()
{ {
tag_map.clear(); tag_map.clear();
for (auto context: file_contexts)
#if __APPLE__
std::for_each(file_contexts.begin(), file_contexts.end(), [this](std::shared_ptr<kc::FileContext> &context)
#else
std::for_each(std::execution::par_unseq, file_contexts.begin(), file_contexts.end(), [this](std::shared_ptr<kc::FileContext> &context)
#endif
{ {
if (context->file_entry->relative_path.extension() == ".md") if (context->file_entry->relative_path.extension() == ".md")
{ {
@ -40,7 +51,7 @@ void FileContextCache::parse_all()
} }
} }
} }
} });
} }
void FileContextCache::clear() void FileContextCache::clear()
@ -49,14 +60,19 @@ void FileContextCache::clear()
file_contexts.shrink_to_fit(); file_contexts.shrink_to_fit();
} }
size_t FileContextCache::size() size_t FileContextCache::size() const
{ {
return file_contexts.size(); return file_contexts.size();
} }
std::vector<std::shared_ptr<kc::FileContext>> FileContextCache::get() std::vector<std::shared_ptr<kc::FileContext>> FileContextCache::get() const
{ {
return file_contexts; return file_contexts;
} }
std::string FileContextCache::get_root_path() const
{
return root_path;
}
} }

View File

@ -12,14 +12,16 @@ class FileContextCache {
public: public:
void load(std::string root_path); void load(std::string root_path);
void clear(); void clear();
size_t size(); size_t size() const;
std::vector<std::shared_ptr<kc::FileContext>> get(); std::vector<std::shared_ptr<kc::FileContext>> get() const;
void parse_all(); void parse_all();
std::string get_root_path() const;
std::unordered_map<std::string, std::vector<std::shared_ptr<kc::FileEntry>>> tag_map; std::unordered_map<std::string, std::vector<std::shared_ptr<kc::FileEntry>>> tag_map;
private: private:
std::vector<std::shared_ptr<kc::FileContext>> file_contexts; std::vector<std::shared_ptr<kc::FileContext>> file_contexts;
std::string root_path;
}; };
} }

View File

@ -16,12 +16,20 @@ Link::Link(std::string original)
link = original_form.substr(opening_link + 1, closing_link - opening_link - 1); link = original_form.substr(opening_link + 1, closing_link - opening_link - 1);
auto display_pos = original_form.find('#', opening_link); external = link.starts_with("http");
if(display_pos != std::string::npos) auto sublink_pos = original_form.find('#', opening_link);
if(sublink_pos != std::string::npos)
{ {
display = original_form.substr(display_pos + 1, closing_link - display_pos - 1); sublink = original_form.substr(sublink_pos + 1, closing_link - sublink_pos - 1);
link = original_form.substr(opening_link + 1, sublink_pos - opening_link - 1);
} }
} }
bool Link::is_external() const
{
return external;
}
} }

View File

@ -12,10 +12,13 @@ class Link {
std::string display; std::string display;
std::string link; std::string link;
std::string sublink; std::string sublink;
bool is_external() const;
Link(std::string original); Link(std::string original);
private: private:
bool external;
}; };
} }

48
src/valid/link.cpp Normal file
View File

@ -0,0 +1,48 @@
#include "link.hpp"
#include <filesystem>
#include <iostream>
#include "../logging.hpp"
namespace fs = std::filesystem;
namespace kc {
std::vector<kc::FileLinkStateResult> validate_links(const std::vector<std::shared_ptr<kc::FileContext>> &contexts)
{
std::vector<kc::FileLinkStateResult> ret;
auto invalid_counter = 0;
for (auto context : contexts)
{
if (context->links.size() > 0)
{
for (auto link: context->links)
{
if(!link.is_external()) {
auto composed = context->file_entry->file_entry.path().parent_path() / fs::path(link.link);
auto entry = fs::directory_entry(composed);
if(!entry.exists())
{
print_and_log("Invalid link: " + std::string(context->file_entry->file_entry.path()) + " -> " + link.original_form);
invalid_counter++;
}
}
}
}
}
if (invalid_counter == 0)
{
print_and_log("All links valid");
}
return ret;
}
}

25
src/valid/link.hpp Normal file
View File

@ -0,0 +1,25 @@
#pragma once
#include <memory>
#include <vector>
#include "../parse/FileContext.hpp"
namespace kc {
enum LinkState {
VALID, INVALID
};
struct LinkStateResult {
LinkState link_state;
kc::Link link;
};
struct FileLinkStateResult {
std::shared_ptr<FileContext> file_context;
std::vector<LinkStateResult> link_states;
};
std::vector<FileLinkStateResult> validate_links(const std::vector<std::shared_ptr<kc::FileContext>> &contexts);
}