From 47a6e1e522aa456847fdf0a90d0889d8365f2568 Mon Sep 17 00:00:00 2001 From: Andy Pack Date: Sat, 17 Jun 2023 10:18:57 +0100 Subject: [PATCH] validating links --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 1 + src/config.cpp | 7 ++++++ src/logging.cpp | 13 ++++++++++-- src/logging.hpp | 3 ++- src/main.cpp | 53 +++++++++++++++++++++------------------------- src/parse/Link.cpp | 14 +++++++++--- src/parse/Link.hpp | 3 +++ src/valid/link.cpp | 38 +++++++++++++++++++++++++++++++++ src/valid/link.hpp | 25 ++++++++++++++++++++++ 10 files changed, 123 insertions(+), 36 deletions(-) create mode 100644 src/valid/link.cpp create mode 100644 src/valid/link.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index dcc4d6b..eaa652b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ project(kc VERSION 1.0 DESCRIPTION - "C++ scratchpad" + "Knowledge crawler for analysing notes" LANGUAGES CXX) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7d514ff..92029a2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,6 +5,7 @@ parse/Link.cpp parse/FileContext.cpp parse/FileContextCache.cpp + valid/link.cpp logging.cpp config.cpp ) diff --git a/src/config.cpp b/src/config.cpp index c2f1070..6c4bf9a 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -14,9 +14,14 @@ std::shared_ptr init_config(int argc, const char *argv[]) ("help", "produce help message") ("path,p", po::value()->default_value("."), "set root path of knowledge base") ("config", po::value()->default_value("kc.ini"), "config file location") + ("command", po::value(), "command to execute") + ("subargs", po::value >(), "Arguments for command") ("index", po::value()->default_value(1), "index") ; + po::positional_options_description pos; + pos.add("command", 1).add("subargs", -1); + po::options_description cmdline_options; cmdline_options.add(desc); @@ -30,6 +35,8 @@ std::shared_ptr init_config(int argc, const char *argv[]) auto vm = std::make_shared(); po::store(po::command_line_parser(argc, argv) .options(cmdline_options) + .positional(pos) + // .allow_unregistered() .run(), *vm); diff --git a/src/logging.cpp b/src/logging.cpp index 5f287a8..57365ee 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -1,5 +1,7 @@ #include "logging.hpp" +#include + namespace logging = boost::log; namespace src = boost::log::sources; namespace sinks = boost::log::sinks; @@ -13,10 +15,11 @@ void init_logging() ( keywords::file_name = "kc_%N.log", keywords::time_based_rotation = sinks::file::rotation_at_time_point(0, 0, 0), - keywords::format = "[%TimeStamp%] [%ThreadID%] [%Severity%] %Message%" + keywords::format = "[%TimeStamp%] [%ThreadID%] [%Severity%] %Message%", + keywords::open_mode = std::ios::app ); - logging::add_console_log(std::cout, boost::log::keywords::format = "[%TimeStamp%] [%Severity%] >> %Message%"); + // logging::add_console_log(std::cout, boost::log::keywords::format = "[%TimeStamp%] [%Severity%] >> %Message%"); logging::core::get()->set_filter ( @@ -24,4 +27,10 @@ void init_logging() ); logging::add_common_attributes(); +} + +inline void print_and_log(std::string log_line) +{ + BOOST_LOG_TRIVIAL(info) << log_line; + std::cout << log_line << std::endl; } \ No newline at end of file diff --git a/src/logging.hpp b/src/logging.hpp index 80231cc..9098ffd 100644 --- a/src/logging.hpp +++ b/src/logging.hpp @@ -7,4 +7,5 @@ #include #include -void init_logging(); \ No newline at end of file +void init_logging(); +void print_and_log(std::string log_line); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index ecd42fa..7261e3c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,6 +10,10 @@ #include "config.hpp" #include "fs/fs.hpp" #include "parse/FileContextCache.hpp" +#include "valid/link.hpp" + +void run_validate(std::shared_ptr config); + int main(int argc, const char *argv[]) { @@ -24,39 +28,30 @@ int main(int argc, const char *argv[]) { if(config) { - auto env_path = (*config)["path"].as(); - BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path; - - auto file_cache = kc::FileContextCache(); - file_cache.load(env_path); - file_cache.parse_all(); - - auto context = file_cache.get()[(*config)["index"].as()]; - - std::cout << context->file_entry->get_content() << std::endl << std::endl << std::endl; - - std::cout << "links: " << context->links.size() << std::endl; - std::cout << "images: " << context->images.size() << std::endl; - std::cout << "tags: " << context->tags.size() << std::endl << std::endl << std::endl;; - - for (auto link : context->links) + if (config->count("command") == 1) { - std::cout << link.original_form << " " << link.display << " --- " << link.link << std::endl; - } + auto command = (*config)["command"].as(); - std::cout << "tag cache: " << file_cache.tag_map.size() << std::endl; - - for (auto tag : file_cache.tag_map) - { - std::cout << tag.first << ": "; - - for (auto tag_entry: tag.second) + if (command == "validate") { - std::cout << tag_entry->relative_path << ", "; + run_validate(config); } - - std::cout << std::endl; } - + else + { + BOOST_LOG_TRIVIAL(info) << "command not found"; + } } +} + +void run_validate(std::shared_ptr config) +{ + auto env_path = (*config)["path"].as(); + BOOST_LOG_TRIVIAL(info) << "Loading knowledge base from " << env_path; + + auto file_cache = kc::FileContextCache(); + file_cache.load(env_path); + file_cache.parse_all(); + + kc::validate_links(file_cache.get()); } \ No newline at end of file diff --git a/src/parse/Link.cpp b/src/parse/Link.cpp index 8385edd..a7c4ac7 100644 --- a/src/parse/Link.cpp +++ b/src/parse/Link.cpp @@ -16,12 +16,20 @@ Link::Link(std::string original) link = original_form.substr(opening_link + 1, closing_link - opening_link - 1); - auto display_pos = original_form.find('#', opening_link); + external = link.starts_with("http"); - if(display_pos != std::string::npos) + auto sublink_pos = original_form.find('#', opening_link); + + if(sublink_pos != std::string::npos) { - display = original_form.substr(display_pos + 1, closing_link - display_pos - 1); + sublink = original_form.substr(sublink_pos + 1, closing_link - sublink_pos - 1); + link = original_form.substr(opening_link + 1, sublink_pos - opening_link - 1); } } +bool Link::is_external() const +{ + return external; +} + } \ No newline at end of file diff --git a/src/parse/Link.hpp b/src/parse/Link.hpp index 843d345..42d99d6 100644 --- a/src/parse/Link.hpp +++ b/src/parse/Link.hpp @@ -12,10 +12,13 @@ class Link { std::string display; std::string link; std::string sublink; + bool is_external() const; Link(std::string original); private: + + bool external; }; } \ No newline at end of file diff --git a/src/valid/link.cpp b/src/valid/link.cpp new file mode 100644 index 0000000..691ecd1 --- /dev/null +++ b/src/valid/link.cpp @@ -0,0 +1,38 @@ +#include "link.hpp" + +#include +#include + +namespace fs = std::filesystem; + +namespace kc { + +std::vector validate_links(const std::vector> &contexts) +{ + std::vector ret; + + for (auto context : contexts) + { + if (context->links.size() > 0) + { + for (auto link: context->links) + { + if(!link.is_external()) { + + auto composed = context->file_entry->file_entry.path().parent_path() / fs::path(link.link); + + auto entry = fs::directory_entry(composed); + + if(!entry.exists()) + { + std::cout << link.link << " + " << context->file_entry->file_entry.path() << " = " << composed << std::endl; + } + } + } + } + } + + return ret; +} + +} \ No newline at end of file diff --git a/src/valid/link.hpp b/src/valid/link.hpp new file mode 100644 index 0000000..ef6b11f --- /dev/null +++ b/src/valid/link.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include "../parse/FileContext.hpp" + +namespace kc { + +enum LinkState { + VALID, INVALID +}; + +struct LinkStateResult { + LinkState link_state; + kc::Link link; +}; + +struct FileLinkStateResult { + std::shared_ptr file_context; + std::vector link_states; +}; + +std::vector validate_links(const std::vector> &contexts); + +} \ No newline at end of file