diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..73e12ad --- /dev/null +++ b/.gitattributes @@ -0,0 +1,12 @@ +* text=auto +*.cpp text eol=lf +*.hpp text eol=lf +*.h text eol=lf +*.md text eol=lf +*.json text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.txt text eol=lf +*.csv text eol=lf +CMakeLists.txt text eol=lf +CMakePresets.json text eol=lf diff --git a/CHANGELOG.md b/CHANGELOG.md index d3bb431..52919b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ All notable user-visible changes should be recorded here. - Added sanitized golden `report.md` / `report.json` regression fixtures to lock report contracts. - Expanded parser coverage for `Accepted publickey` and selected `pam_faillock` / `pam_sss` variants. - Added compact host-level summaries for multi-host reports. +- Added optional CSV export for findings and warnings when explicitly requested. ### Changed @@ -45,7 +46,7 @@ All notable user-visible changes should be recorded here. ## v0.1.0 ### Added - + - Parser support for `syslog_legacy` and `journalctl_short_full` authentication log input. - Rule-based detections for SSH brute force, multi-user probing, and sudo burst activity. - Parser coverage telemetry including parsed/unparsed counts and unknown-pattern buckets. @@ -54,11 +55,11 @@ All notable user-visible changes should be recorded here. ### Changed - Established deterministic Markdown and JSON reporting for the MVP release. - -### Fixed - -- None. - + +### Fixed + +- None. + ### Docs - Added CI, CodeQL, repository hardening guidance, and release-facing project documentation for the first public release. diff --git a/README.md b/README.md index b631c14..088dd63 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,20 @@ -# LogLens - -[![CI](https://github.com/stacknil/LogLens/actions/workflows/ci.yml/badge.svg)](https://github.com/stacknil/LogLens/actions/workflows/ci.yml) -[![CodeQL](https://github.com/stacknil/LogLens/actions/workflows/codeql.yml/badge.svg)](https://github.com/stacknil/LogLens/actions/workflows/codeql.yml) - -C++20 defensive log analysis CLI for Linux authentication logs, with parser coverage telemetry, configurable detection rules, CI, and CodeQL. - -It parses `auth.log` / `secure`-style syslog input and `journalctl --output=short-full`-style input, normalizes authentication evidence, applies configurable rule-based detections, and emits deterministic Markdown and JSON reports. - +# LogLens + +[![CI](https://github.com/stacknil/LogLens/actions/workflows/ci.yml/badge.svg)](https://github.com/stacknil/LogLens/actions/workflows/ci.yml) +[![CodeQL](https://github.com/stacknil/LogLens/actions/workflows/codeql.yml/badge.svg)](https://github.com/stacknil/LogLens/actions/workflows/codeql.yml) + +C++20 defensive log analysis CLI for Linux authentication logs, with parser coverage telemetry, configurable detection rules, CI, and CodeQL. + +It parses `auth.log` / `secure`-style syslog input and `journalctl --output=short-full`-style input, normalizes authentication evidence, applies configurable rule-based detections, and emits deterministic Markdown and JSON reports, with optional CSV exports for findings and warnings. + ## Project Status LogLens is an MVP / early release. The repository is stable enough for public review, local experimentation, and extension, but the parser and detection coverage are intentionally narrow. ## Why This Project Exists - -Many small security tools can detect a handful of known log patterns. Fewer tools make their parsing limits visible. - + +Many small security tools can detect a handful of known log patterns. Fewer tools make their parsing limits visible. + LogLens is built around three ideas: - detection engineering over offensive functionality @@ -22,42 +22,42 @@ LogLens is built around three ideas: - repository discipline over throwaway scripts The project reports suspicious login activity while also surfacing parser coverage, unknown-line buckets, CI status, and code scanning hygiene. - -## Scope - -LogLens is a defensive, public-safe repository. -It is intended for log parsing, detection experiments, and engineering practice. -It does not provide exploitation, persistence, credential attack automation, or live offensive capability. - -## Repository Checks - -LogLens includes two minimal GitHub Actions workflows: - -- `CI` builds and tests the project on `ubuntu-latest` and `windows-latest` -- `CodeQL` runs GitHub code scanning for C/C++ on pushes, pull requests, and a weekly schedule - + +## Scope + +LogLens is a defensive, public-safe repository. +It is intended for log parsing, detection experiments, and engineering practice. +It does not provide exploitation, persistence, credential attack automation, or live offensive capability. + +## Repository Checks + +LogLens includes two minimal GitHub Actions workflows: + +- `CI` builds and tests the project on `ubuntu-latest` and `windows-latest` +- `CodeQL` runs GitHub code scanning for C/C++ on pushes, pull requests, and a weekly schedule + Both workflows are intended to stay stable enough to require on pull requests to `main`. Release-facing documentation is split across [`CHANGELOG.md`](./CHANGELOG.md), [`docs/release-process.md`](./docs/release-process.md), [`docs/release-v0.1.0.md`](./docs/release-v0.1.0.md), and the repository's GitHub release notes. The repository hardening note is in [`docs/repo-hardening.md`](./docs/repo-hardening.md), and vulnerability reporting guidance is in [`SECURITY.md`](./SECURITY.md). - -## Threat Model - -LogLens is designed for offline review of `auth.log` and `secure` style text logs collected from systems you own or administer. The MVP focuses on common, high-signal patterns that often appear during credential guessing, username enumeration, or bursty privileged command use. - -The current tool helps answer: - -- Is one source IP generating repeated SSH failures in a short window? -- Is one source IP trying several usernames in a short window? -- Is one account running sudo unusually often in a short window? - -It does not attempt to replace a SIEM, correlate across hosts, enrich IPs, or decide whether a finding is malicious on its own. - -## Detections - -LogLens currently detects: - -- Repeated SSH failed password attempts from the same IP within 10 minutes -- One IP trying multiple usernames within 15 minutes -- Bursty sudo activity from the same user within 5 minutes - + +## Threat Model + +LogLens is designed for offline review of `auth.log` and `secure` style text logs collected from systems you own or administer. The MVP focuses on common, high-signal patterns that often appear during credential guessing, username enumeration, or bursty privileged command use. + +The current tool helps answer: + +- Is one source IP generating repeated SSH failures in a short window? +- Is one source IP trying several usernames in a short window? +- Is one account running sudo unusually often in a short window? + +It does not attempt to replace a SIEM, correlate across hosts, enrich IPs, or decide whether a finding is malicious on its own. + +## Detections + +LogLens currently detects: + +- Repeated SSH failed password attempts from the same IP within 10 minutes +- One IP trying multiple usernames within 15 minutes +- Bursty sudo activity from the same user within 5 minutes + LogLens currently parses and reports these additional auth patterns beyond the core detector inputs: - `Accepted publickey` SSH successes @@ -68,13 +68,13 @@ LogLens currently parses and reports these additional auth patterns beyond the c - selected `pam_sss(...:auth)` failure variants LogLens also tracks parser coverage telemetry for unsupported or malformed lines, including: - -- `total_lines` -- `parsed_lines` -- `unparsed_lines` -- `parse_success_rate` -- `top_unknown_patterns` - + +- `total_lines` +- `parsed_lines` +- `unparsed_lines` +- `parse_success_rate` +- `top_unknown_patterns` + LogLens does not currently detect: - Lateral movement @@ -82,7 +82,7 @@ LogLens does not currently detect: - SSH key misuse - Many PAM-specific failures beyond the parsed `pam_unix`, `pam_faillock`, and `pam_sss` sample patterns - Cross-file or cross-host correlation - + ## Build ```bash @@ -94,120 +94,132 @@ ctest --test-dir build --output-on-failure For fresh-machine setup and repeatable local presets, see [`docs/dev-setup.md`](./docs/dev-setup.md). ## Run - -```bash -./build/loglens --mode syslog --year 2026 ./assets/sample_auth.log ./out -./build/loglens --mode journalctl-short-full ./assets/sample_journalctl_short_full.log ./out-journal -./build/loglens --config ./assets/sample_config.json ./assets/sample_auth.log ./out-config -``` - -The CLI writes: - -- `report.md` -- `report.json` - + +```bash +./build/loglens --mode syslog --year 2026 ./assets/sample_auth.log ./out +./build/loglens --mode journalctl-short-full ./assets/sample_journalctl_short_full.log ./out-journal +./build/loglens --config ./assets/sample_config.json ./assets/sample_auth.log ./out-config +./build/loglens --mode syslog --year 2026 --csv ./assets/sample_auth.log ./out-csv +``` + +The CLI writes: + +- `report.md` +- `report.json` + into the output directory you provide. If you omit the output directory, the files are written into the current working directory. +When you add `--csv`, LogLens also writes: + +- `findings.csv` +- `warnings.csv` + +Without `--csv`, LogLens does not create, overwrite, or delete any existing CSV files in the output directory. + +The CSV schema is intentionally small and stable: + +- `findings.csv`: `rule`, `subject_kind`, `subject`, `event_count`, `window_start`, `window_end`, `usernames`, `summary` +- `warnings.csv`: `kind`, `message` + When an input spans multiple hostnames, both reports add compact host-level summaries without changing detector thresholds or introducing cross-host correlation logic. - -## Sample Output - -For sanitized sample input, see [`assets/sample_auth.log`](./assets/sample_auth.log) and [`assets/sample_journalctl_short_full.log`](./assets/sample_journalctl_short_full.log). - -`report.md` summary excerpt: - -```markdown -## Summary -- Input mode: syslog_legacy -- Parsed events: 14 -- Findings: 3 -- Parser warnings: 2 -``` - -`report.json` summary excerpt: - -```json -{ - "input_mode": "syslog_legacy", - "parsed_event_count": 14, - "finding_count": 3, - "warning_count": 2 -} -``` - -The config file schema is intentionally small and strict: - -```json -{ - "input_mode": "syslog_legacy", - "timestamp": { - "assume_year": 2026 - }, - "brute_force": { "threshold": 5, "window_minutes": 10 }, - "multi_user_probing": { "threshold": 3, "window_minutes": 15 }, - "sudo_burst": { "threshold": 3, "window_minutes": 5 }, - "auth_signal_mappings": { - "ssh_failed_password": { - "counts_as_attempt_evidence": true, - "counts_as_terminal_auth_failure": true - }, - "ssh_invalid_user": { - "counts_as_attempt_evidence": true, - "counts_as_terminal_auth_failure": true - }, - "ssh_failed_publickey": { - "counts_as_attempt_evidence": true, - "counts_as_terminal_auth_failure": true - }, - "pam_auth_failure": { - "counts_as_attempt_evidence": true, - "counts_as_terminal_auth_failure": false - } - } -} -``` - -This mapping lets LogLens normalize parsed events into detection signals before applying brute-force or multi-user rules. By default, `pam_auth_failure` is treated as lower-confidence attempt evidence and does not count as a terminal authentication failure unless the config explicitly upgrades it. - -Timestamp handling is now explicit: - -- `--mode syslog` or `input_mode: syslog_legacy` requires `--year` or `timestamp.assume_year` -- `--mode journalctl-short-full` or `input_mode: journalctl_short_full` parses the embedded year and timezone and ignores `assume_year` - -## Example Input - -```text -Mar 10 08:11:22 example-host sshd[1234]: Failed password for invalid user admin from 203.0.113.10 port 51022 ssh2 -Mar 10 08:12:10 example-host sshd[1235]: Accepted password for alice from 203.0.113.20 port 51111 ssh2 -Mar 10 08:15:00 example-host sudo: alice : TTY=pts/0 ; PWD=/home/alice ; USER=root ; COMMAND=/usr/bin/systemctl restart ssh -Mar 10 08:27:10 example-host sshd[1243]: Failed publickey for invalid user svc-backup from 203.0.113.40 port 51240 ssh2 -Mar 10 08:28:33 example-host pam_unix(sshd:auth): authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=203.0.113.41 user=alice -Mar 10 08:29:50 example-host pam_unix(sudo:session): session opened for user root by alice(uid=0) -Mar 10 08:30:12 example-host sshd[1244]: Connection closed by authenticating user alice 203.0.113.50 port 51290 [preauth] -Mar 10 08:31:18 example-host sshd[1245]: Timeout, client not responding from 203.0.113.51 port 51291 -``` - -`journalctl --output short-full` style example: - -```text -Tue 2026-03-10 08:11:22 UTC example-host sshd[2234]: Failed password for invalid user admin from 203.0.113.10 port 51022 ssh2 -Tue 2026-03-10 08:13:10 UTC example-host sshd[2236]: Failed password for test from 203.0.113.10 port 51040 ssh -Tue 2026-03-10 08:18:05 UTC example-host sshd[2238]: Failed publickey for invalid user deploy from 203.0.113.10 port 51060 ssh2 -Tue 2026-03-10 08:31:18 UTC example-host sshd[2245]: Connection closed by authenticating user alice 203.0.113.51 port 51291 [preauth] -``` - -## Known Limitations - -- `syslog_legacy` requires an explicit year; LogLens does not guess one implicitly. + +## Sample Output + +For sanitized sample input, see [`assets/sample_auth.log`](./assets/sample_auth.log) and [`assets/sample_journalctl_short_full.log`](./assets/sample_journalctl_short_full.log). + +`report.md` summary excerpt: + +```markdown +## Summary +- Input mode: syslog_legacy +- Parsed events: 14 +- Findings: 3 +- Parser warnings: 2 +``` + +`report.json` summary excerpt: + +```json +{ + "input_mode": "syslog_legacy", + "parsed_event_count": 14, + "finding_count": 3, + "warning_count": 2 +} +``` + +The config file schema is intentionally small and strict: + +```json +{ + "input_mode": "syslog_legacy", + "timestamp": { + "assume_year": 2026 + }, + "brute_force": { "threshold": 5, "window_minutes": 10 }, + "multi_user_probing": { "threshold": 3, "window_minutes": 15 }, + "sudo_burst": { "threshold": 3, "window_minutes": 5 }, + "auth_signal_mappings": { + "ssh_failed_password": { + "counts_as_attempt_evidence": true, + "counts_as_terminal_auth_failure": true + }, + "ssh_invalid_user": { + "counts_as_attempt_evidence": true, + "counts_as_terminal_auth_failure": true + }, + "ssh_failed_publickey": { + "counts_as_attempt_evidence": true, + "counts_as_terminal_auth_failure": true + }, + "pam_auth_failure": { + "counts_as_attempt_evidence": true, + "counts_as_terminal_auth_failure": false + } + } +} +``` + +This mapping lets LogLens normalize parsed events into detection signals before applying brute-force or multi-user rules. By default, `pam_auth_failure` is treated as lower-confidence attempt evidence and does not count as a terminal authentication failure unless the config explicitly upgrades it. + +Timestamp handling is now explicit: + +- `--mode syslog` or `input_mode: syslog_legacy` requires `--year` or `timestamp.assume_year` +- `--mode journalctl-short-full` or `input_mode: journalctl_short_full` parses the embedded year and timezone and ignores `assume_year` + +## Example Input + +```text +Mar 10 08:11:22 example-host sshd[1234]: Failed password for invalid user admin from 203.0.113.10 port 51022 ssh2 +Mar 10 08:12:10 example-host sshd[1235]: Accepted password for alice from 203.0.113.20 port 51111 ssh2 +Mar 10 08:15:00 example-host sudo: alice : TTY=pts/0 ; PWD=/home/alice ; USER=root ; COMMAND=/usr/bin/systemctl restart ssh +Mar 10 08:27:10 example-host sshd[1243]: Failed publickey for invalid user svc-backup from 203.0.113.40 port 51240 ssh2 +Mar 10 08:28:33 example-host pam_unix(sshd:auth): authentication failure; logname= uid=0 euid=0 tty=ssh ruser= rhost=203.0.113.41 user=alice +Mar 10 08:29:50 example-host pam_unix(sudo:session): session opened for user root by alice(uid=0) +Mar 10 08:30:12 example-host sshd[1244]: Connection closed by authenticating user alice 203.0.113.50 port 51290 [preauth] +Mar 10 08:31:18 example-host sshd[1245]: Timeout, client not responding from 203.0.113.51 port 51291 +``` + +`journalctl --output short-full` style example: + +```text +Tue 2026-03-10 08:11:22 UTC example-host sshd[2234]: Failed password for invalid user admin from 203.0.113.10 port 51022 ssh2 +Tue 2026-03-10 08:13:10 UTC example-host sshd[2236]: Failed password for test from 203.0.113.10 port 51040 ssh +Tue 2026-03-10 08:18:05 UTC example-host sshd[2238]: Failed publickey for invalid user deploy from 203.0.113.10 port 51060 ssh2 +Tue 2026-03-10 08:31:18 UTC example-host sshd[2245]: Connection closed by authenticating user alice 203.0.113.51 port 51291 [preauth] +``` + +## Known Limitations + +- `syslog_legacy` requires an explicit year; LogLens does not guess one implicitly. - `journalctl_short_full` currently supports `UTC`, `GMT`, `Z`, and numeric timezone offsets, not arbitrary timezone abbreviations. - Parser coverage is still selective: it covers common `sshd`, `sudo`, `pam_unix`, and selected `pam_faillock` / `pam_sss` variants rather than broad Linux auth-family support. - Unsupported lines are surfaced as parser telemetry and warnings, not as detector findings. - `pam_unix` auth failures remain lower-confidence by default unless signal mappings explicitly upgrade them. -- Detector configuration uses a fixed `config.json` schema rather than partial overrides or alternate config formats. -- Findings are rule-based triage aids, not incident verdicts or attribution. - -## Future Roadmap - +- Detector configuration uses a fixed `config.json` schema rather than partial overrides or alternate config formats. +- Findings are rule-based triage aids, not incident verdicts or attribution. + +## Future Roadmap + - Additional auth patterns and PAM coverage -- Optional CSV export - Larger sanitized test corpus diff --git a/src/main.cpp b/src/main.cpp index d396ecd..fa84af0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -16,12 +16,13 @@ struct CliOptions { std::optional config_path; std::optional input_mode; std::optional assumed_year; + bool emit_csv = false; std::filesystem::path input_path; std::filesystem::path output_directory; }; void print_usage() { - std::cerr << "Usage: loglens [--config ] [--mode ] [--year ] [output_dir]\n"; + std::cerr << "Usage: loglens [--config ] [--mode ] [--year ] [--csv] [output_dir]\n"; } int parse_year_argument(std::string_view value) { @@ -81,6 +82,12 @@ CliOptions parse_cli_options(int argc, char* argv[]) { continue; } + if (argument == "--csv") { + options.emit_csv = true; + ++index; + continue; + } + if (argument.starts_with('-')) { throw std::runtime_error("unknown option: " + std::string{argv[index]}); } @@ -156,13 +163,17 @@ int main(int argc, char* argv[]) { parsed.warnings, app_config.detector.auth_signal_mappings}; - loglens::write_reports(report_data, options.output_directory); + loglens::write_reports(report_data, options.output_directory, options.emit_csv); std::cout << "Parsed events: " << parsed.events.size() << '\n'; std::cout << "Findings: " << findings.size() << '\n'; std::cout << "Warnings: " << parsed.warnings.size() << '\n'; std::cout << "Markdown report: " << (options.output_directory / "report.md").string() << '\n'; std::cout << "JSON report: " << (options.output_directory / "report.json").string() << '\n'; + if (options.emit_csv) { + std::cout << "Findings CSV: " << (options.output_directory / "findings.csv").string() << '\n'; + std::cout << "Warnings CSV: " << (options.output_directory / "warnings.csv").string() << '\n'; + } } catch (const std::exception& error) { std::cerr << "LogLens failed: " << error.what() << '\n'; return 1; diff --git a/src/report.cpp b/src/report.cpp index be8126d..e68797a 100644 --- a/src/report.cpp +++ b/src/report.cpp @@ -53,6 +53,30 @@ std::string escape_json(std::string_view value) { return escaped; } +std::string escape_csv(std::string_view value) { + bool needs_quotes = value.find_first_of(",\"\n\r") != std::string_view::npos; + std::string escaped; + escaped.reserve(value.size() + 2); + + if (needs_quotes) { + escaped.push_back('"'); + } + + for (const char character : value) { + if (character == '"') { + escaped += "\"\""; + } else { + escaped.push_back(character); + } + } + + if (needs_quotes) { + escaped.push_back('"'); + } + + return escaped; +} + std::vector sorted_findings(const std::vector& findings) { auto ordered = findings; std::sort(ordered.begin(), ordered.end(), [](const Finding& left, const Finding& right) { @@ -124,6 +148,17 @@ std::string usernames_note(const Finding& finding) { return note.str(); } +std::string usernames_csv_field(const Finding& finding) { + std::ostringstream usernames; + for (std::size_t index = 0; index < finding.usernames.size(); ++index) { + if (index != 0) { + usernames << ';'; + } + usernames << finding.usernames[index]; + } + return usernames.str(); +} + std::string format_parse_success_rate(double rate) { std::ostringstream output; output << std::fixed << std::setprecision(4) << rate; @@ -506,7 +541,39 @@ std::string render_json_report(const ReportData& data) { return output.str(); } -void write_reports(const ReportData& data, const std::filesystem::path& output_directory) { +std::string render_findings_csv(const ReportData& data) { + std::ostringstream output; + const auto findings = sorted_findings(data.findings); + + output << "rule,subject_kind,subject,event_count,window_start,window_end,usernames,summary\n"; + for (const auto& finding : findings) { + output << escape_csv(to_string(finding.type)) << ',' + << escape_csv(finding.subject_kind) << ',' + << escape_csv(finding.subject) << ',' + << finding.event_count << ',' + << escape_csv(format_timestamp(finding.first_seen)) << ',' + << escape_csv(format_timestamp(finding.last_seen)) << ',' + << escape_csv(usernames_csv_field(finding)) << ',' + << escape_csv(finding.summary) << '\n'; + } + + return output.str(); +} + +std::string render_warnings_csv(const ReportData& data) { + std::ostringstream output; + const auto warnings = sorted_warnings(data.warnings); + + output << "kind,message\n"; + for (const auto& warning : warnings) { + output << "parse_warning," + << escape_csv(warning.reason) << '\n'; + } + + return output.str(); +} + +void write_reports(const ReportData& data, const std::filesystem::path& output_directory, bool emit_csv) { std::filesystem::create_directories(output_directory); std::ofstream markdown_output(output_directory / "report.md"); @@ -514,6 +581,19 @@ void write_reports(const ReportData& data, const std::filesystem::path& output_d std::ofstream json_output(output_directory / "report.json"); json_output << render_json_report(data); + + if (!emit_csv) { + return; + } + + const auto findings_csv_path = output_directory / "findings.csv"; + const auto warnings_csv_path = output_directory / "warnings.csv"; + + std::ofstream findings_csv_output(findings_csv_path); + findings_csv_output << render_findings_csv(data); + + std::ofstream warnings_csv_output(warnings_csv_path); + warnings_csv_output << render_warnings_csv(data); } } // namespace loglens diff --git a/src/report.hpp b/src/report.hpp index 47d8368..1f22337 100644 --- a/src/report.hpp +++ b/src/report.hpp @@ -22,6 +22,8 @@ struct ReportData { std::string render_markdown_report(const ReportData& data); std::string render_json_report(const ReportData& data); -void write_reports(const ReportData& data, const std::filesystem::path& output_directory); +std::string render_findings_csv(const ReportData& data); +std::string render_warnings_csv(const ReportData& data); +void write_reports(const ReportData& data, const std::filesystem::path& output_directory, bool emit_csv = false); } // namespace loglens diff --git a/tests/fixtures/report_contracts/multi_host_syslog_legacy/findings.csv b/tests/fixtures/report_contracts/multi_host_syslog_legacy/findings.csv new file mode 100644 index 0000000..2836703 --- /dev/null +++ b/tests/fixtures/report_contracts/multi_host_syslog_legacy/findings.csv @@ -0,0 +1,4 @@ +rule,subject_kind,subject,event_count,window_start,window_end,usernames,summary +brute_force,source_ip,203.0.113.10,5,2026-03-11 09:00:00,2026-03-11 09:04:05,,5 failed SSH attempts from 203.0.113.10 within 10 minutes. +multi_user_probing,source_ip,203.0.113.10,5,2026-03-11 09:00:00,2026-03-11 09:04:05,admin;deploy;guest;root;test,203.0.113.10 targeted 5 usernames within 15 minutes. +sudo_burst,username,alice,3,2026-03-11 09:11:00,2026-03-11 09:14:15,,alice ran 3 sudo commands within 5 minutes. diff --git a/tests/fixtures/report_contracts/multi_host_syslog_legacy/warnings.csv b/tests/fixtures/report_contracts/multi_host_syslog_legacy/warnings.csv new file mode 100644 index 0000000..c0f9236 --- /dev/null +++ b/tests/fixtures/report_contracts/multi_host_syslog_legacy/warnings.csv @@ -0,0 +1,4 @@ +kind,message +parse_warning,unrecognized auth pattern: pam_sss_unknown_user +parse_warning,unrecognized auth pattern: sshd_connection_closed_preauth +parse_warning,unrecognized auth pattern: sshd_timeout_or_disconnection diff --git a/tests/fixtures/report_contracts/syslog_legacy/findings.csv b/tests/fixtures/report_contracts/syslog_legacy/findings.csv new file mode 100644 index 0000000..51b0a0f --- /dev/null +++ b/tests/fixtures/report_contracts/syslog_legacy/findings.csv @@ -0,0 +1,4 @@ +rule,subject_kind,subject,event_count,window_start,window_end,usernames,summary +brute_force,source_ip,203.0.113.10,5,2026-03-10 08:11:22,2026-03-10 08:18:05,,5 failed SSH attempts from 203.0.113.10 within 10 minutes. +multi_user_probing,source_ip,203.0.113.10,5,2026-03-10 08:11:22,2026-03-10 08:18:05,admin;deploy;guest;root;test,203.0.113.10 targeted 5 usernames within 15 minutes. +sudo_burst,username,alice,3,2026-03-10 08:21:00,2026-03-10 08:24:15,,alice ran 3 sudo commands within 5 minutes. diff --git a/tests/fixtures/report_contracts/syslog_legacy/warnings.csv b/tests/fixtures/report_contracts/syslog_legacy/warnings.csv new file mode 100644 index 0000000..8fea094 --- /dev/null +++ b/tests/fixtures/report_contracts/syslog_legacy/warnings.csv @@ -0,0 +1,3 @@ +kind,message +parse_warning,unrecognized auth pattern: sshd_connection_closed_preauth +parse_warning,unrecognized auth pattern: sshd_timeout_or_disconnection diff --git a/tests/test_cli.cpp b/tests/test_cli.cpp index 0060f42..7a7db55 100644 --- a/tests/test_cli.cpp +++ b/tests/test_cli.cpp @@ -115,6 +115,54 @@ int main(int argc, char* argv[]) { const auto syslog_markdown = read_file(syslog_cli_out / "report.md"); const auto syslog_json = read_file(syslog_cli_out / "report.json"); expect_report_core_fields(syslog_markdown, syslog_json, "syslog_legacy", true, false); + expect(!std::filesystem::exists(syslog_cli_out / "findings.csv"), + "did not expect findings.csv without explicit csv flag"); + expect(!std::filesystem::exists(syslog_cli_out / "warnings.csv"), + "did not expect warnings.csv without explicit csv flag"); + + const auto csv_out = output_dir / "csv_run"; + std::filesystem::create_directories(csv_out); + const int csv_exit = std::system(build_command( + quote_argument(loglens_exe) + + " --mode syslog --year 2026 --csv " + + quote_argument(sample_log) + + " " + quote_argument(csv_out)) + .c_str()); + expect(csv_exit == 0, "expected syslog CSV CLI run to succeed"); + const auto findings_csv = read_file(csv_out / "findings.csv"); + const auto warnings_csv = read_file(csv_out / "warnings.csv"); + expect(findings_csv.find("rule,subject_kind,subject,event_count,window_start,window_end,usernames,summary") + == 0, + "expected findings csv header"); + expect(findings_csv.find("brute_force,source_ip,203.0.113.10,5,2026-03-10 08:11:22,2026-03-10 08:18:05,,5 failed SSH attempts from 203.0.113.10 within 10 minutes.") + != std::string::npos, + "expected brute-force findings csv row"); + expect(warnings_csv.find("kind,message") == 0, "expected warnings csv header"); + expect(warnings_csv.find("parse_warning,unrecognized auth pattern: sshd_connection_closed_preauth") + != std::string::npos, + "expected warning csv row"); + + const auto stale_csv_out = output_dir / "stale_csv_run"; + std::filesystem::create_directories(stale_csv_out); + { + std::ofstream findings_output(stale_csv_out / "findings.csv"); + findings_output << "keep-findings\n"; + } + { + std::ofstream warnings_output(stale_csv_out / "warnings.csv"); + warnings_output << "keep-warnings\n"; + } + const int stale_csv_exit = std::system(build_command( + quote_argument(loglens_exe) + + " --mode syslog --year 2026 " + + quote_argument(sample_log) + + " " + quote_argument(stale_csv_out)) + .c_str()); + expect(stale_csv_exit == 0, "expected non-csv run with pre-existing csv files to succeed"); + expect(read_file(stale_csv_out / "findings.csv") == "keep-findings\n", + "expected non-csv run to preserve pre-existing findings.csv"); + expect(read_file(stale_csv_out / "warnings.csv") == "keep-warnings\n", + "expected non-csv run to preserve pre-existing warnings.csv"); const auto config_run_out = output_dir / "config_run"; std::filesystem::create_directories(config_run_out); @@ -139,24 +187,20 @@ int main(int argc, char* argv[]) { const auto journalctl_markdown = read_file(journalctl_out / "report.md"); const auto journalctl_json = read_file(journalctl_out / "report.json"); expect_report_core_fields(journalctl_markdown, journalctl_json, "journalctl_short_full", false, true); + expect(!std::filesystem::exists(journalctl_out / "findings.csv"), + "did not expect journalctl findings.csv without explicit csv flag"); + expect(!std::filesystem::exists(journalctl_out / "warnings.csv"), + "did not expect journalctl warnings.csv without explicit csv flag"); const auto missing_year_out = output_dir / "missing_year"; std::filesystem::create_directories(missing_year_out); - const auto missing_year_stdout = output_dir / "missing_year_stdout.txt"; - const auto missing_year_stderr = output_dir / "missing_year_stderr.txt"; const int missing_year_exit = std::system(build_command( quote_argument(loglens_exe) + " --mode syslog " + quote_argument(sample_log) - + " " + quote_argument(missing_year_out), - &missing_year_stdout, - &missing_year_stderr) + + " " + quote_argument(missing_year_out)) .c_str()); expect(missing_year_exit != 0, "expected syslog mode without year to fail"); - const auto missing_year_error = read_file(missing_year_stderr); - expect(missing_year_error.find("--year") != std::string::npos - || missing_year_error.find("assume_year") != std::string::npos, - "expected missing-year error to mention year requirements"); const auto invalid_config = output_dir / "invalid_config.json"; { @@ -178,21 +222,13 @@ int main(int argc, char* argv[]) { const auto invalid_out = output_dir / "invalid_config_run"; std::filesystem::create_directories(invalid_out); - const auto invalid_stdout = output_dir / "invalid_stdout.txt"; - const auto invalid_stderr = output_dir / "invalid_stderr.txt"; const int invalid_exit = std::system(build_command( quote_argument(loglens_exe) + " --config " + quote_argument(invalid_config) + " " + quote_argument(sample_log) - + " " + quote_argument(invalid_out), - &invalid_stdout, - &invalid_stderr) + + " " + quote_argument(invalid_out)) .c_str()); expect(invalid_exit != 0, "expected invalid config CLI run to fail"); - const auto invalid_error = read_file(invalid_stderr); - expect(invalid_error.find("assume_year") != std::string::npos, - "expected CLI error output to mention the failing config field"); - return 0; } diff --git a/tests/test_report_contracts.cpp b/tests/test_report_contracts.cpp index 8d8f9ce..c1d5476 100644 --- a/tests/test_report_contracts.cpp +++ b/tests/test_report_contracts.cpp @@ -167,6 +167,16 @@ std::vector extract_json_contract_lines(const std::string& json) { return contract_lines; } +std::vector extract_csv_contract_lines(const std::string& csv) { + std::vector lines; + for (const auto& raw_line : split_lines(csv)) { + if (!raw_line.empty()) { + lines.push_back(raw_line); + } + } + return lines; +} + std::string quote_argument(std::string_view value) { return "\"" + std::string(value) + "\""; } @@ -202,7 +212,8 @@ void run_report_contract_case(const std::filesystem::path& loglens_exe, const std::filesystem::path& fixture_directory, const std::filesystem::path& output_root, const std::string& mode_argument, - const std::string& extra_arguments = {}) { + const std::string& extra_arguments = {}, + bool expect_csv = false) { const auto repo = repo_root(); const auto relative_input = std::filesystem::relative(fixture_directory / "input.log", repo).generic_string(); const auto case_output = output_root / fixture_directory.filename(); @@ -234,6 +245,36 @@ void run_report_contract_case(const std::filesystem::path& loglens_exe, extract_json_contract_lines(actual_json), extract_json_contract_lines(golden_json), "json contract mismatch for " + fixture_directory.filename().string()); + + const auto golden_findings_csv = fixture_directory / "findings.csv"; + const auto golden_warnings_csv = fixture_directory / "warnings.csv"; + if (expect_csv) { + expect(std::filesystem::exists(golden_findings_csv), + "expected golden findings.csv for " + fixture_directory.filename().string()); + expect(std::filesystem::exists(case_output / "findings.csv"), + "expected findings.csv for " + fixture_directory.filename().string()); + expect_equal_lines( + extract_csv_contract_lines(read_file(case_output / "findings.csv")), + extract_csv_contract_lines(read_file(golden_findings_csv)), + "findings csv contract mismatch for " + fixture_directory.filename().string()); + } else { + expect(!std::filesystem::exists(case_output / "findings.csv"), + "did not expect findings.csv for " + fixture_directory.filename().string()); + } + + if (expect_csv) { + expect(std::filesystem::exists(golden_warnings_csv), + "expected golden warnings.csv for " + fixture_directory.filename().string()); + expect(std::filesystem::exists(case_output / "warnings.csv"), + "expected warnings.csv for " + fixture_directory.filename().string()); + expect_equal_lines( + extract_csv_contract_lines(read_file(case_output / "warnings.csv")), + extract_csv_contract_lines(read_file(golden_warnings_csv)), + "warnings csv contract mismatch for " + fixture_directory.filename().string()); + } else { + expect(!std::filesystem::exists(case_output / "warnings.csv"), + "did not expect warnings.csv for " + fixture_directory.filename().string()); + } } } // namespace @@ -274,6 +315,20 @@ int main(int argc, char* argv[]) { fixture_root / "multi_host_journalctl_short_full", output_root, "journalctl-short-full"); + run_report_contract_case( + loglens_exe, + fixture_root / "syslog_legacy", + output_root, + "syslog", + "--year 2026 --csv", + true); + run_report_contract_case( + loglens_exe, + fixture_root / "multi_host_syslog_legacy", + output_root, + "syslog", + "--year 2026 --csv", + true); } catch (...) { std::filesystem::current_path(original_cwd); throw;