mirror of
https://github.com/rajnandan1/kener.git
synced 2026-06-23 04:10:22 +00:00
docs updates
This commit is contained in:
@@ -1,386 +0,0 @@
|
||||
---
|
||||
name: documentation-writer
|
||||
description: Specialized skill for creating and editing high-quality Kener documentation. MUST be used whenever creating or editing documentation files in the src/routes/(docs)/docs/content/ directory or updating docs.json navigation.
|
||||
---
|
||||
|
||||
# Documentation Writer
|
||||
|
||||
## Overview
|
||||
|
||||
This skill provides guidelines and best practices for creating high-quality, easy-to-follow documentation for Kener. Use this skill when creating new documentation or editing existing documentation files.
|
||||
|
||||
## Documentation Structure
|
||||
|
||||
### Content Location
|
||||
|
||||
All documentation files are located in:
|
||||
```
|
||||
src/routes/(docs)/docs/content/
|
||||
```
|
||||
|
||||
### File Organization
|
||||
|
||||
```
|
||||
content/
|
||||
├── introduction.md # Top-level pages
|
||||
├── configuration.md
|
||||
├── monitors/ # Nested sections
|
||||
│ ├── overview.md
|
||||
│ ├── api.md
|
||||
│ └── ping.md
|
||||
├── alerting/
|
||||
│ ├── overview.md
|
||||
│ └── triggers.md
|
||||
└── setup/
|
||||
├── email-setup.md
|
||||
└── database-setup.md
|
||||
```
|
||||
|
||||
### Navigation Configuration
|
||||
|
||||
Navigation is controlled by `/src/routes/(docs)/docs.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"sidebar": [
|
||||
{
|
||||
"group": "Getting Started",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"slug": "introduction"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Important**: When creating new documentation files, you MUST also update `docs.json` to add navigation entries.
|
||||
|
||||
## Required Frontmatter
|
||||
|
||||
Every documentation file MUST begin with YAML frontmatter:
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: Your Page Title
|
||||
description: A clear, concise description of the page content (used for SEO and previews)
|
||||
---
|
||||
```
|
||||
|
||||
## Custom Heading Anchors
|
||||
|
||||
Use custom heading IDs for stable deep linking. This is CRITICAL for maintaining links even if heading text changes.
|
||||
|
||||
### Syntax
|
||||
|
||||
```markdown
|
||||
## Section Title {#section-title}
|
||||
```
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Always include custom IDs** for all H2 and H3 headings
|
||||
2. Use lowercase, kebab-case format
|
||||
3. Keep IDs short but descriptive
|
||||
4. IDs should be unique within the document
|
||||
|
||||
### Examples
|
||||
|
||||
```markdown
|
||||
## How API Monitoring Works {#how-api-monitoring-works}
|
||||
### Configuration Options {#configuration-options}
|
||||
### Common Issues {#common-issues}
|
||||
```
|
||||
|
||||
## Markdown Features
|
||||
|
||||
### Callout Boxes
|
||||
|
||||
Use GitHub-flavored callout syntax for important information:
|
||||
|
||||
```markdown
|
||||
> [!NOTE]
|
||||
> Additional context or helpful information
|
||||
|
||||
> [!WARNING]
|
||||
> Caution about potential issues
|
||||
|
||||
> [!TIP]
|
||||
> Helpful tips for users
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Critical information users should know
|
||||
```
|
||||
|
||||
### Code Blocks
|
||||
|
||||
Always specify the language for syntax highlighting:
|
||||
|
||||
````markdown
|
||||
```javascript
|
||||
// Your code here
|
||||
```
|
||||
|
||||
```bash
|
||||
# Shell commands
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"config": "value"
|
||||
}
|
||||
```
|
||||
````
|
||||
|
||||
### Tables
|
||||
|
||||
Use tables for structured information:
|
||||
|
||||
```markdown
|
||||
| Column 1 | Column 2 | Column 3 |
|
||||
| :------- | :------- | :------- |
|
||||
| Value 1 | Value 2 | Value 3 |
|
||||
```
|
||||
|
||||
### Internal Links
|
||||
|
||||
Reference other documentation pages using absolute paths:
|
||||
|
||||
```markdown
|
||||
See the [Email Setup](/docs/setup/email-setup) guide for details.
|
||||
```
|
||||
|
||||
For section anchors:
|
||||
|
||||
```markdown
|
||||
See [Configuration Options](/docs/monitors/api#configuration-options).
|
||||
```
|
||||
|
||||
## Documentation Structure Template
|
||||
|
||||
Follow this structure for comprehensive documentation pages:
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: Feature Name
|
||||
description: Brief description of the feature
|
||||
---
|
||||
|
||||
Brief introduction paragraph explaining what the feature is and why it matters.
|
||||
|
||||
## How It Works {#how-it-works}
|
||||
|
||||
Explain the basic workflow or concept.
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
|
||||
Document all available configuration options using tables:
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :------- | :------- | :---------- | :------ |
|
||||
| `option` | `string` | What it does | `value` |
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
Provide practical examples with clear explanations.
|
||||
|
||||
### 1. Basic Example {#basic-example}
|
||||
|
||||
Simple use case with minimal configuration.
|
||||
|
||||
### 2. Advanced Example {#advanced-example}
|
||||
|
||||
More complex use case demonstrating advanced features.
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Topic 1 {#best-practices-topic-1}
|
||||
|
||||
Practical advice for optimal usage.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :---- | :------------- | :------- |
|
||||
| Problem | Why it happens | How to fix |
|
||||
|
||||
## Next Steps {#next-steps}
|
||||
|
||||
- [Related Documentation 1](/docs/path/to/doc1)
|
||||
- [Related Documentation 2](/docs/path/to/doc2)
|
||||
```
|
||||
|
||||
## Quality Guidelines
|
||||
|
||||
### Writing Style
|
||||
|
||||
1. **Be Clear and Concise**: Use simple language and short sentences
|
||||
2. **Be Action-Oriented**: Focus on what users need to do
|
||||
3. **Be Consistent**: Use the same terms throughout documentation
|
||||
4. **Be Complete**: Don't assume prior knowledge
|
||||
|
||||
### Content Requirements
|
||||
|
||||
1. **Start with Context**: Explain why the feature exists
|
||||
2. **Show Examples**: Always include practical examples
|
||||
3. **Include Edge Cases**: Document common pitfalls
|
||||
4. **Link Related Topics**: Help users navigate to related information
|
||||
|
||||
### Technical Writing
|
||||
|
||||
1. **Use Active Voice**: "Configure the monitor" not "The monitor should be configured"
|
||||
2. **Use Present Tense**: "Kener sends notifications" not "Kener will send notifications"
|
||||
3. **Be Specific**: Include exact values, paths, and commands
|
||||
4. **Test All Code**: Ensure all code examples work
|
||||
|
||||
## Avoid Duplication
|
||||
|
||||
### Before Creating New Documentation
|
||||
|
||||
1. **Search Existing Docs**: Check if the topic is already covered
|
||||
2. **Use References**: If content exists elsewhere, link to it instead of duplicating
|
||||
3. **Extend Existing Docs**: Consider adding to an existing page rather than creating a new one
|
||||
|
||||
### When Content Overlaps
|
||||
|
||||
**Instead of duplicating**:
|
||||
```markdown
|
||||
## Email Configuration
|
||||
|
||||
To send emails, you need to configure SMTP settings:
|
||||
- SMTP_HOST=smtp.example.com
|
||||
- SMTP_PORT=587
|
||||
...
|
||||
```
|
||||
|
||||
**Use references**:
|
||||
```markdown
|
||||
## Email Configuration
|
||||
|
||||
Kener supports email notifications through SMTP or Resend. For detailed configuration instructions, see the [Email Setup](/docs/setup/email-setup) guide.
|
||||
```
|
||||
|
||||
### When to Duplicate vs Reference
|
||||
|
||||
**Duplicate when**:
|
||||
- The information is critical to understanding the current topic
|
||||
- The content is very brief (1-2 sentences)
|
||||
- The duplicate provides necessary context
|
||||
|
||||
**Reference when**:
|
||||
- Detailed configuration steps exist elsewhere
|
||||
- The topic is comprehensively covered in another document
|
||||
- The information would make the current document too long
|
||||
|
||||
## Workflow
|
||||
|
||||
### Creating New Documentation
|
||||
|
||||
1. **Plan the Structure**: Outline sections before writing
|
||||
2. **Check for Existing Content**: Ensure you're not duplicating
|
||||
3. **Write the Content**: Follow the template and guidelines
|
||||
4. **Add Custom Heading IDs**: Include `{#id}` for all major headings
|
||||
5. **Update docs.json**: Add navigation entry
|
||||
6. **Add Internal Links**: Link to related documentation
|
||||
7. **Review and Test**: Verify all links, code examples, and formatting
|
||||
|
||||
### Editing Existing Documentation
|
||||
|
||||
1. **Read the Full Document**: Understand the existing content
|
||||
2. **Maintain Consistency**: Match the existing style and tone
|
||||
3. **Update Related Sections**: Keep all sections consistent
|
||||
4. **Preserve Custom IDs**: Never change existing `{#custom-ids}`
|
||||
5. **Update Links**: Ensure all internal references remain valid
|
||||
|
||||
### Before Finalizing
|
||||
|
||||
**Checklist**:
|
||||
- [ ] Frontmatter includes title and description
|
||||
- [ ] All H2 and H3 headings have custom IDs
|
||||
- [ ] Code blocks specify language
|
||||
- [ ] Tables are properly formatted
|
||||
- [ ] Internal links use `/docs/` prefix
|
||||
- [ ] Examples are practical and tested
|
||||
- [ ] No duplicate content (or justified duplication)
|
||||
- [ ] Navigation entry added to docs.json (for new pages)
|
||||
- [ ] Related documentation is linked
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Configuration Documentation
|
||||
|
||||
Always document environment variables with tables:
|
||||
|
||||
```markdown
|
||||
### Environment Variables {#environment-variables}
|
||||
|
||||
| Variable | Description | Required | Default |
|
||||
| :------- | :---------- | :------- | :------ |
|
||||
| `VAR_NAME` | What it does | Yes/No | `value` |
|
||||
```
|
||||
|
||||
### API Documentation
|
||||
|
||||
Structure API documentation consistently:
|
||||
|
||||
```markdown
|
||||
## Endpoint Name {#endpoint-name}
|
||||
|
||||
Description of what the endpoint does.
|
||||
|
||||
**Endpoint**: `POST /api/path`
|
||||
|
||||
**Authentication**: Required/Not Required
|
||||
|
||||
**Request Body**:
|
||||
```json
|
||||
{
|
||||
"field": "value"
|
||||
}
|
||||
```
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"result": "success"
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
### Monitor Type Documentation
|
||||
|
||||
Follow this structure for monitor types:
|
||||
|
||||
1. Brief introduction
|
||||
2. How it works
|
||||
3. Configuration options (table)
|
||||
4. Examples (multiple, increasing complexity)
|
||||
5. Best practices (with sub-sections)
|
||||
6. Troubleshooting (table format)
|
||||
|
||||
## File Naming Conventions
|
||||
|
||||
- Use lowercase
|
||||
- Use hyphens for spaces: `email-setup.md`
|
||||
- Be descriptive: `creating-managing.md` not `create.md`
|
||||
- Match the slug in docs.json
|
||||
|
||||
## Examples of High-Quality Documentation
|
||||
|
||||
Reference these examples when writing:
|
||||
- `/docs/content/setup/email-setup.md` - Comprehensive setup guide
|
||||
- `/docs/content/monitors/api.md` - Feature documentation with examples
|
||||
- `/docs/content/alerting/overview.md` - Conceptual overview
|
||||
|
||||
## Key Principles
|
||||
|
||||
1. **User-First**: Write for users of all skill levels
|
||||
2. **Scannable**: Use headings, lists, and tables for easy scanning
|
||||
3. **Actionable**: Users should know what to do after reading
|
||||
4. **Maintainable**: Use references to avoid update cascades
|
||||
5. **Accessible**: Clear language, good structure, proper formatting
|
||||
@@ -0,0 +1,86 @@
|
||||
---
|
||||
name: documentation-writer
|
||||
description: Specialized skill for creating and editing high-quality Kener documentation. MUST be used whenever creating or editing documentation files in the src/routes/(docs)/docs/content/ directory or updating docs.json navigation.
|
||||
---
|
||||
|
||||
# Documentation Writer
|
||||
|
||||
Use this skill for all docs edits in `src/routes/(docs)/docs/content/` and when updating docs navigation in `src/routes/(docs)/docs.json`.
|
||||
|
||||
## Non-negotiable rules
|
||||
|
||||
1. **Be concise**: remove repetition and background that does not help the user complete a task.
|
||||
2. **Be actionable**: prioritize “what to do” over theory.
|
||||
3. **One source of truth**: if another page already has details, link to it instead of duplicating.
|
||||
4. **Preserve structure**: keep valid frontmatter and heading anchor IDs.
|
||||
5. **Keep examples copyable**: minimal, tested-looking, and directly relevant.
|
||||
6. **Search before writing**: always check if the content already exists in some form before adding new sections or pages.
|
||||
7. **Check Relevant Code**: Search the codebase inside `src/` for any relevant code, comments, or tests that can inform the documentation content and ensure accuracy.
|
||||
|
||||
## Docs config model (current)
|
||||
|
||||
`docs.json` is versioned. Sidebar lives inside tabs:
|
||||
|
||||
- `versions[].content.navigation.tabs[].sidebar`
|
||||
- Sidebar groups contain `pages`
|
||||
- Page paths use `content` (legacy `slug` may still appear in older content)
|
||||
|
||||
When adding a new doc page, add it to the appropriate tab sidebar path.
|
||||
|
||||
## Versioned link policy (mandatory)
|
||||
|
||||
- For v4 docs content, internal links MUST use explicit v4 paths: `/docs/v4/...`.
|
||||
- Do not use unversioned shortcuts like `/docs/alerting/...` in v4 pages.
|
||||
- Before finalizing, verify every internal link in edited files resolves to the intended version.
|
||||
|
||||
## Required page format
|
||||
|
||||
```markdown
|
||||
---
|
||||
title: Page Title
|
||||
description: One-line summary of user outcome
|
||||
---
|
||||
```
|
||||
|
||||
- Use custom anchors for H2/H3 headings: `## Section {#section}`
|
||||
- Use GitHub admonitions only when needed: `[!NOTE]`, `[!IMPORTANT]`, `[!WARNING]`, `[!CAUTION]`, `[!TIP]`
|
||||
- Prefer short sections and short lists
|
||||
|
||||
## Preferred structure (default)
|
||||
|
||||
1. Short intro (1–2 sentences)
|
||||
2. Quick setup / minimum config
|
||||
3. Required variables/options table
|
||||
4. Verification step
|
||||
5. Top troubleshooting items
|
||||
|
||||
Only add extra sections if they materially improve task completion.
|
||||
|
||||
## Keep docs lean
|
||||
|
||||
Remove or avoid:
|
||||
|
||||
- Multiple near-identical examples
|
||||
- Long conceptual explainers
|
||||
- Platform-by-platform repetition unless behavior differs
|
||||
- Large checklists that restate earlier content
|
||||
|
||||
## Editing workflow
|
||||
|
||||
1. Read the whole target document.
|
||||
2. Compress verbose sections first.
|
||||
3. Keep critical caveats and breaking notes.
|
||||
4. Ensure internal links and anchors still work.
|
||||
5. If adding files, update `docs.json` navigation in the correct version/tab.
|
||||
|
||||
## Review checklist
|
||||
|
||||
- [ ] Title/description frontmatter exists
|
||||
- [ ] Key steps are clear and copyable
|
||||
- [ ] Content is concise and non-duplicative
|
||||
- [ ] Headings keep stable custom anchors
|
||||
- [ ] Navigation updated (if new page)
|
||||
- [ ] Internal links point to correct paths
|
||||
- [ ] v4 pages use `/docs/v4/...` internal links (no unversioned `/docs/...` shortcuts)
|
||||
- [ ] No outdated or irrelevant content remains
|
||||
- [ ] Admonitions used appropriately for important notes
|
||||
@@ -21,3 +21,11 @@ You MUST use this tool whenever writing Svelte code before sending it to the use
|
||||
|
||||
Generates a Svelte Playground link with the provided code.
|
||||
After completing the code, ask the user if they want a playground link. Only call this tool after user confirmation and NEVER if code was written to files in their project.
|
||||
|
||||
## Documentation writing skill
|
||||
|
||||
When the user asks to write or edit documentation, follow the skill file:
|
||||
|
||||
- `.claude/skills/documentation-writer/SKILL.md`
|
||||
|
||||
This is mandatory for docs-related tasks. Prioritize short, clear, action-oriented docs and avoid bloat.
|
||||
|
||||
+64
-6
@@ -28,9 +28,16 @@ const CONTENT_DIR = path.join(__dirname, "../src/routes/(docs)/docs/content");
|
||||
const REDIS_DOCS_KEY = "kener-docs:search:documents";
|
||||
|
||||
interface DocsPage {
|
||||
title: string;
|
||||
content?: string;
|
||||
slug?: string;
|
||||
pages?: DocsPageSource[];
|
||||
}
|
||||
|
||||
interface DocsPageSource {
|
||||
title: string;
|
||||
slug: string;
|
||||
pages?: DocsPage[];
|
||||
pages?: DocsPageSource[];
|
||||
}
|
||||
|
||||
interface DocsSidebarGroup {
|
||||
@@ -38,10 +45,26 @@ interface DocsSidebarGroup {
|
||||
pages: DocsPage[];
|
||||
}
|
||||
|
||||
interface DocsConfig {
|
||||
interface DocsNavTab {
|
||||
name: string;
|
||||
sidebar: DocsSidebarGroup[];
|
||||
}
|
||||
|
||||
interface DocsVersion {
|
||||
name: string;
|
||||
slug: string;
|
||||
latest?: boolean;
|
||||
content: {
|
||||
navigation?: {
|
||||
tabs?: DocsNavTab[];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface DocsRootConfig {
|
||||
versions: DocsVersion[];
|
||||
}
|
||||
|
||||
interface DocsSearchDocument {
|
||||
id: string;
|
||||
title: string;
|
||||
@@ -86,7 +109,11 @@ function getMarkdownContent(slug: string): string | null {
|
||||
/**
|
||||
* Recursively collect all pages from sidebar groups (including nested pages)
|
||||
*/
|
||||
function collectPages(pages: DocsPage[], group: string, result: Array<{ page: DocsPage; group: string }>): void {
|
||||
function collectPages(
|
||||
pages: DocsPageSource[],
|
||||
group: string,
|
||||
result: Array<{ page: DocsPageSource; group: string }>,
|
||||
): void {
|
||||
for (const page of pages) {
|
||||
result.push({ page, group });
|
||||
if (page.pages && page.pages.length > 0) {
|
||||
@@ -95,6 +122,27 @@ function collectPages(pages: DocsPage[], group: string, result: Array<{ page: Do
|
||||
}
|
||||
}
|
||||
|
||||
function normalizePage(page: DocsPage): DocsPageSource {
|
||||
const resolvedPath = page.content ?? page.slug;
|
||||
|
||||
if (!resolvedPath) {
|
||||
throw new Error(`[index-docs] Page \"${page.title}\" must define content or slug`);
|
||||
}
|
||||
|
||||
return {
|
||||
title: page.title,
|
||||
slug: resolvedPath,
|
||||
pages: page.pages?.map(normalizePage),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeSidebar(sidebar: DocsSidebarGroup[]): Array<{ group: string; pages: DocsPageSource[] }> {
|
||||
return sidebar.map((group) => ({
|
||||
group: group.group,
|
||||
pages: group.pages.map(normalizePage),
|
||||
}));
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
// Validate Redis URL
|
||||
if (!process.env.REDIS_URL) {
|
||||
@@ -108,15 +156,25 @@ async function main(): Promise<void> {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const config: DocsConfig = JSON.parse(fs.readFileSync(DOCS_JSON_PATH, "utf-8"));
|
||||
const config: DocsRootConfig = JSON.parse(fs.readFileSync(DOCS_JSON_PATH, "utf-8"));
|
||||
const latestVersion = config.versions.find((version) => version.latest) ?? config.versions[0];
|
||||
|
||||
if (!latestVersion) {
|
||||
console.error("[index-docs] No versions found in docs.json");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const primaryTabSidebar = latestVersion.content.navigation?.tabs?.[0]?.sidebar ?? [];
|
||||
const sidebar = normalizeSidebar(primaryTabSidebar);
|
||||
const documents: DocsSearchDocument[] = [];
|
||||
|
||||
// Collect all pages from sidebar
|
||||
const allPages: Array<{ page: DocsPage; group: string }> = [];
|
||||
for (const sidebarGroup of config.sidebar) {
|
||||
const allPages: Array<{ page: DocsPageSource; group: string }> = [];
|
||||
for (const sidebarGroup of sidebar) {
|
||||
collectPages(sidebarGroup.pages, sidebarGroup.group, allPages);
|
||||
}
|
||||
|
||||
console.log(`[index-docs] Indexing version ${latestVersion.slug}`);
|
||||
console.log(`[index-docs] Found ${allPages.length} pages to index`);
|
||||
|
||||
for (const { page, group } of allPages) {
|
||||
|
||||
+8
-8
@@ -1,4 +1,4 @@
|
||||
export const DefaultAPIEval = `(async function (statusCode, responseTime, responseRaw, modules) {
|
||||
export const DefaultAPIEval = `async function (statusCode, responseTime, responseRaw, modules) {
|
||||
let statusCodeShort = Math.floor(statusCode/100);
|
||||
if(statusCode == 429 || (statusCodeShort >=2 && statusCodeShort <= 3)) {
|
||||
return {
|
||||
@@ -10,9 +10,9 @@ export const DefaultAPIEval = `(async function (statusCode, responseTime, respon
|
||||
status: 'DOWN',
|
||||
latency: responseTime,
|
||||
}
|
||||
})`;
|
||||
}`;
|
||||
|
||||
export const DefaultPingEval = `(async function (arrayOfPings) {
|
||||
export const DefaultPingEval = `async function (arrayOfPings) {
|
||||
let latencyTotal = arrayOfPings.reduce((acc, ping) => {
|
||||
return acc + ping.latency;
|
||||
}, 0);
|
||||
@@ -25,9 +25,9 @@ export const DefaultPingEval = `(async function (arrayOfPings) {
|
||||
status: alive ? 'UP' : 'DOWN',
|
||||
latency: latencyTotal / arrayOfPings.length,
|
||||
}
|
||||
})`;
|
||||
}`;
|
||||
|
||||
export const DefaultTCPEval = `(async function (arrayOfPings) {
|
||||
export const DefaultTCPEval = `async function (arrayOfPings) {
|
||||
let latencyTotal = arrayOfPings.reduce((acc, ping) => {
|
||||
return acc + ping.latency;
|
||||
}, 0);
|
||||
@@ -40,14 +40,14 @@ export const DefaultTCPEval = `(async function (arrayOfPings) {
|
||||
status: alive ? 'UP' : 'DOWN',
|
||||
latency: latencyTotal / arrayOfPings.length,
|
||||
}
|
||||
})`;
|
||||
}`;
|
||||
|
||||
export const DefaultGamedigEval = `(async function (responseTime, responseRaw) {
|
||||
export const DefaultGamedigEval = `async function (responseTime, responseRaw) {
|
||||
return {
|
||||
status: 'UP',
|
||||
latency: responseTime,
|
||||
}
|
||||
})`;
|
||||
}`;
|
||||
export const GAMEDIG_TIMEOUT = 10 * 1000; // 10 seconds
|
||||
export const GAMEDIG_SOCKET_TIMEOUT = 2 * 1000; // 2 seconds
|
||||
|
||||
|
||||
@@ -329,6 +329,8 @@ export const SendInvitationEmail = async (email: string, role: string, name: str
|
||||
template.template_subject || "Your Invitation to Join",
|
||||
emailVars,
|
||||
[email],
|
||||
undefined,
|
||||
template.template_text_body || "",
|
||||
);
|
||||
}
|
||||
};
|
||||
@@ -375,6 +377,8 @@ export const ResendInvitationEmail = async (email: string, currentUserRole: stri
|
||||
template.template_subject || "Your Invitation to Join",
|
||||
emailVars,
|
||||
[email],
|
||||
undefined,
|
||||
template.template_text_body || "",
|
||||
);
|
||||
}
|
||||
};
|
||||
@@ -419,7 +423,12 @@ export const SendVerificationEmail = async (toUserId: number, currentUser: { id:
|
||||
if (!template) {
|
||||
throw new Error("Verify email template not found");
|
||||
}
|
||||
await sendEmail(template.template_html_body || "", template.template_subject || "Verify Your Email", emailVars, [
|
||||
user.email,
|
||||
]);
|
||||
await sendEmail(
|
||||
template.template_html_body || "",
|
||||
template.template_subject || "Verify Your Email",
|
||||
emailVars,
|
||||
[user.email],
|
||||
undefined,
|
||||
template.template_text_body || "",
|
||||
);
|
||||
};
|
||||
|
||||
@@ -476,6 +476,8 @@ export async function SubscriberLogin(email: string): Promise<{ success: boolean
|
||||
template.template_subject || "Your Verification Code",
|
||||
emailVars,
|
||||
[normalizedEmail],
|
||||
undefined,
|
||||
template.template_text_body || "",
|
||||
);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
|
||||
@@ -13,22 +13,26 @@ export default async function send(
|
||||
variables: Record<string, string | number | boolean>,
|
||||
to: string[],
|
||||
from?: string,
|
||||
emailTextBody?: string,
|
||||
) {
|
||||
// Implementation for sending email notification using the provided triggerRecord, variables, and template
|
||||
|
||||
let envSecretsTemplate = GetRequiredSecrets(emailBody + emailSubject);
|
||||
let envSecretsTemplate = GetRequiredSecrets(emailBody + emailSubject + (emailTextBody || ""));
|
||||
|
||||
for (let i = 0; i < envSecretsTemplate.length; i++) {
|
||||
const secret = envSecretsTemplate[i];
|
||||
if (secret.replace !== undefined) {
|
||||
emailBody = ReplaceAllOccurrences(emailBody, secret.find, secret.replace);
|
||||
emailSubject = ReplaceAllOccurrences(emailSubject, secret.find, secret.replace);
|
||||
if (emailTextBody !== undefined) {
|
||||
emailTextBody = ReplaceAllOccurrences(emailTextBody, secret.find, secret.replace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const subject = Mustache.render(emailSubject, variables);
|
||||
const htmlBody = Mustache.render(emailBody, variables);
|
||||
const textBody = striptags(htmlBody);
|
||||
const textBody = emailTextBody ? Mustache.render(emailTextBody, variables) : striptags(htmlBody);
|
||||
|
||||
try {
|
||||
let isEmailSetupDone = IsEmailSetup();
|
||||
|
||||
@@ -18,6 +18,7 @@ export interface EmailJobData {
|
||||
toEmails: string[];
|
||||
templateHtmlBody: string;
|
||||
templateSubject: string;
|
||||
templateTextBody?: string;
|
||||
variables: Record<string, string | number | boolean>;
|
||||
fromEmail?: string;
|
||||
}
|
||||
@@ -33,7 +34,8 @@ const addWorker = () => {
|
||||
if (worker) return worker;
|
||||
|
||||
worker = q.createWorker(getQueue(), async (job: Job): Promise<void> => {
|
||||
const { toEmails, templateHtmlBody, templateSubject, variables, fromEmail } = job.data as EmailJobData;
|
||||
const { toEmails, templateHtmlBody, templateSubject, templateTextBody, variables, fromEmail } =
|
||||
job.data as EmailJobData;
|
||||
|
||||
try {
|
||||
await sendEmail(
|
||||
@@ -42,6 +44,7 @@ const addWorker = () => {
|
||||
variables,
|
||||
toEmails, // Single recipient array
|
||||
fromEmail,
|
||||
templateTextBody,
|
||||
);
|
||||
console.log(`📧 Email sent to ${toEmails}`);
|
||||
} catch (error) {
|
||||
|
||||
@@ -63,6 +63,7 @@ const addWorker = () => {
|
||||
toEmails: [email],
|
||||
templateHtmlBody: template.template_html_body || "",
|
||||
templateSubject: template.template_subject || "Event Update",
|
||||
templateTextBody: template.template_text_body || "",
|
||||
variables: emailVars,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -9,7 +9,10 @@ export interface DocsLogo {
|
||||
|
||||
export interface DocsNavTab {
|
||||
name: string;
|
||||
url: string;
|
||||
url?: string;
|
||||
key?: string;
|
||||
firstPageSlug?: string | null;
|
||||
sidebar?: DocsSidebarGroupSource[];
|
||||
}
|
||||
|
||||
export interface DocsNavigation {
|
||||
@@ -56,7 +59,6 @@ export interface DocsVersionMeta {
|
||||
|
||||
export interface DocsVersionContent {
|
||||
navigation?: DocsNavigation;
|
||||
sidebar: DocsSidebarGroupSource[];
|
||||
footerLinks?: DocsFooterLink[];
|
||||
}
|
||||
|
||||
@@ -82,6 +84,7 @@ export interface DocsConfig {
|
||||
footerLinks?: DocsFooterLink[];
|
||||
versions?: DocsVersionMeta[];
|
||||
activeVersion?: string | null;
|
||||
activeTabKey?: string | null;
|
||||
}
|
||||
|
||||
export interface DocsTableOfContentsItem {
|
||||
|
||||
@@ -49,6 +49,8 @@ export const POST: RequestHandler = async ({ request }) => {
|
||||
template.template_subject || "Your Password Reset Request",
|
||||
emailVars,
|
||||
[email],
|
||||
undefined,
|
||||
template.template_text_body || "",
|
||||
);
|
||||
return json({ success: true });
|
||||
} catch (error) {
|
||||
|
||||
+382
-365
@@ -16,7 +16,230 @@
|
||||
"tabs": [
|
||||
{
|
||||
"name": "Documentation",
|
||||
"url": "/docs/v4/getting-started/introduction"
|
||||
"sidebar": [
|
||||
{
|
||||
"group": "Getting Started",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"content": "v4/getting-started/introduction"
|
||||
},
|
||||
{
|
||||
"title": "Quick Start",
|
||||
"content": "v4/getting-started/quick-start"
|
||||
},
|
||||
{
|
||||
"title": "Basic Setup",
|
||||
"content": "v4/getting-started/basic-setup"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Setup Guide",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Site Configuration",
|
||||
"content": "v4/setup/site-configuration"
|
||||
},
|
||||
{
|
||||
"title": "Customizations",
|
||||
"content": "v4/setup/customizations"
|
||||
},
|
||||
{
|
||||
"title": "Environment Variables",
|
||||
"content": "v4/setup/environment-variables"
|
||||
},
|
||||
{
|
||||
"title": "Email Setup",
|
||||
"content": "v4/setup/email-setup"
|
||||
},
|
||||
{
|
||||
"title": "Database Setup",
|
||||
"content": "v4/setup/database-setup"
|
||||
},
|
||||
{
|
||||
"title": "Redis Setup",
|
||||
"content": "v4/setup/redis-setup"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Core Concepts",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Monitors",
|
||||
"content": "v4/monitors",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/monitors/overview"
|
||||
},
|
||||
{
|
||||
"title": "API Monitors",
|
||||
"content": "v4/monitors/api"
|
||||
},
|
||||
{
|
||||
"title": "Ping Monitors",
|
||||
"content": "v4/monitors/ping"
|
||||
},
|
||||
{
|
||||
"title": "TCP Monitors",
|
||||
"content": "v4/monitors/tcp"
|
||||
},
|
||||
{
|
||||
"title": "DNS Monitors",
|
||||
"content": "v4/monitors/dns"
|
||||
},
|
||||
{
|
||||
"title": "SSL Monitors",
|
||||
"content": "v4/monitors/ssl"
|
||||
},
|
||||
{
|
||||
"title": "SQL Monitors",
|
||||
"content": "v4/monitors/sql"
|
||||
},
|
||||
{
|
||||
"title": "Heartbeat Monitors",
|
||||
"content": "v4/monitors/heartbeat"
|
||||
},
|
||||
{
|
||||
"title": "GameDig Monitors",
|
||||
"content": "v4/monitors/gamedig"
|
||||
},
|
||||
{
|
||||
"title": "Group Monitors",
|
||||
"content": "v4/monitors/group"
|
||||
},
|
||||
{
|
||||
"title": "Sharing Monitors",
|
||||
"content": "v4/sharing"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Alerting",
|
||||
"content": "v4/alerting",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/alerting/overview"
|
||||
},
|
||||
{
|
||||
"title": "Alert Configurations",
|
||||
"content": "v4/alerting/alert-configurations"
|
||||
},
|
||||
{
|
||||
"title": "Triggers",
|
||||
"content": "v4/alerting/triggers"
|
||||
},
|
||||
{
|
||||
"title": "Templates",
|
||||
"content": "v4/alerting/templates"
|
||||
},
|
||||
{
|
||||
"title": "Webhook Examples",
|
||||
"content": "v4/alerting/webhook-examples"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Incidents",
|
||||
"content": "v4/incidents",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/incidents/overview"
|
||||
},
|
||||
{
|
||||
"title": "Creating and Managing",
|
||||
"content": "v4/incidents/creating-managing"
|
||||
},
|
||||
{
|
||||
"title": "Impact on Monitoring",
|
||||
"content": "v4/incidents/impact-on-monitoring"
|
||||
},
|
||||
{
|
||||
"title": "Auto-Generated Incidents",
|
||||
"content": "v4/incidents/auto-generated"
|
||||
},
|
||||
{
|
||||
"title": "Incident Updates",
|
||||
"content": "v4/incidents/updates"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Maintenances",
|
||||
"content": "v4/maintenances",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/maintenances/overview"
|
||||
},
|
||||
{
|
||||
"title": "Creating and Managing",
|
||||
"content": "v4/maintenances/creating-managing"
|
||||
},
|
||||
{
|
||||
"title": "Maintenance Events",
|
||||
"content": "v4/maintenances/events"
|
||||
},
|
||||
{
|
||||
"title": "Impact on Monitoring",
|
||||
"content": "v4/maintenances/impact-on-monitoring"
|
||||
},
|
||||
{
|
||||
"title": "RRULE Patterns",
|
||||
"content": "v4/maintenances/rrule-patterns"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "User Subscriptions",
|
||||
"content": "v4/subscriptions"
|
||||
},
|
||||
{
|
||||
"title": "User Management",
|
||||
"content": "v4/user-management"
|
||||
},
|
||||
|
||||
{
|
||||
"title": "Pages",
|
||||
"content": "v4/pages"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Guides",
|
||||
"sidebar": [
|
||||
{
|
||||
"group": "Guides",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Email Templates",
|
||||
"content": "v4/guides/email-templates"
|
||||
},
|
||||
{
|
||||
"title": "API Custom Eval Examples",
|
||||
"content": "v4/guides/api-custom-eval-examples"
|
||||
},
|
||||
{
|
||||
"title": "Alerting Trigger Examples",
|
||||
"content": "v4/guides/alerting-trigger-examples"
|
||||
},
|
||||
{
|
||||
"title": "Reverse Proxy Setup",
|
||||
"content": "v4/guides/reverse-proxy"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "API Reference",
|
||||
@@ -24,213 +247,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"sidebar": [
|
||||
{
|
||||
"group": "Getting Started",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"content": "v4/getting-started/introduction"
|
||||
},
|
||||
{
|
||||
"title": "Quick Start",
|
||||
"content": "v4/getting-started/quick-start"
|
||||
},
|
||||
{
|
||||
"title": "Installation",
|
||||
"content": "v4/installation"
|
||||
},
|
||||
{
|
||||
"title": "Architecture",
|
||||
"content": "v4/architecture"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Setup Guide",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Environment Variables",
|
||||
"content": "v4/setup/environment-variables"
|
||||
},
|
||||
{
|
||||
"title": "Email Setup",
|
||||
"content": "v4/setup/email-setup"
|
||||
},
|
||||
{
|
||||
"title": "Database Setup",
|
||||
"content": "v4/setup/database-setup"
|
||||
},
|
||||
{
|
||||
"title": "Redis Setup",
|
||||
"content": "v4/setup/redis-setup"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Core Concepts",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Monitors",
|
||||
"content": "v4/monitors",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/monitors/overview"
|
||||
},
|
||||
{
|
||||
"title": "API Monitors",
|
||||
"content": "v4/monitors/api"
|
||||
},
|
||||
{
|
||||
"title": "Ping Monitors",
|
||||
"content": "v4/monitors/ping"
|
||||
},
|
||||
{
|
||||
"title": "TCP Monitors",
|
||||
"content": "v4/monitors/tcp"
|
||||
},
|
||||
{
|
||||
"title": "DNS Monitors",
|
||||
"content": "v4/monitors/dns"
|
||||
},
|
||||
{
|
||||
"title": "SSL Monitors",
|
||||
"content": "v4/monitors/ssl"
|
||||
},
|
||||
{
|
||||
"title": "SQL Monitors",
|
||||
"content": "v4/monitors/sql"
|
||||
},
|
||||
{
|
||||
"title": "Heartbeat Monitors",
|
||||
"content": "v4/monitors/heartbeat"
|
||||
},
|
||||
{
|
||||
"title": "GameDig Monitors",
|
||||
"content": "v4/monitors/gamedig"
|
||||
},
|
||||
{
|
||||
"title": "Group Monitors",
|
||||
"content": "v4/monitors/group"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Alerting",
|
||||
"content": "v4/alerting",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/alerting/overview"
|
||||
},
|
||||
{
|
||||
"title": "Alert Configurations",
|
||||
"content": "v4/alerting/alert-configurations"
|
||||
},
|
||||
{
|
||||
"title": "Triggers",
|
||||
"content": "v4/alerting/triggers"
|
||||
},
|
||||
{
|
||||
"title": "Templates",
|
||||
"content": "v4/alerting/templates"
|
||||
},
|
||||
{
|
||||
"title": "Webhook Examples",
|
||||
"content": "v4/alerting/webhook-examples"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Incidents",
|
||||
"content": "v4/incidents",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/incidents/overview"
|
||||
},
|
||||
{
|
||||
"title": "Creating and Managing",
|
||||
"content": "v4/incidents/creating-managing"
|
||||
},
|
||||
{
|
||||
"title": "Incident Updates",
|
||||
"content": "v4/incidents/updates"
|
||||
},
|
||||
{
|
||||
"title": "Impact on Monitoring",
|
||||
"content": "v4/incidents/impact-on-monitoring"
|
||||
},
|
||||
{
|
||||
"title": "Auto-Generated Incidents",
|
||||
"content": "v4/incidents/auto-generated"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Maintenances",
|
||||
"content": "v4/maintenances",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/maintenances/overview"
|
||||
},
|
||||
{
|
||||
"title": "Creating and Managing",
|
||||
"content": "v4/maintenances/creating-managing"
|
||||
},
|
||||
{
|
||||
"title": "Maintenance Events",
|
||||
"content": "v4/maintenances/events"
|
||||
},
|
||||
{
|
||||
"title": "Impact on Monitoring",
|
||||
"content": "v4/maintenances/impact-on-monitoring"
|
||||
},
|
||||
{
|
||||
"title": "RRULE Patterns",
|
||||
"content": "v4/maintenances/rrule-patterns"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"title": "Configuration",
|
||||
"content": "v4/configuration"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Advanced Topics",
|
||||
"collapsible": true,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Reverse Proxy Setup",
|
||||
"content": "v4/advanced-topics/reverse-proxy"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "API Reference",
|
||||
"pages": [
|
||||
{
|
||||
"title": "Overview",
|
||||
"content": "v4/api-reference"
|
||||
},
|
||||
{
|
||||
"title": "Authentication",
|
||||
"content": "v4/api-reference/authentication"
|
||||
},
|
||||
{
|
||||
"title": "Monitors API",
|
||||
"content": "v4/api-reference/monitors"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
"footerLinks": [
|
||||
{
|
||||
"name": "GitHub",
|
||||
@@ -252,167 +269,167 @@
|
||||
"tabs": [
|
||||
{
|
||||
"name": "Documentation",
|
||||
"url": "/docs"
|
||||
"sidebar": [
|
||||
{
|
||||
"group": "Getting Started",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"content": "v3/home"
|
||||
},
|
||||
{
|
||||
"title": "Get Started",
|
||||
"content": "v3/quick-start"
|
||||
},
|
||||
{
|
||||
"title": "Concepts",
|
||||
"content": "v3/concepts"
|
||||
},
|
||||
{
|
||||
"title": "Deployment",
|
||||
"content": "v3/deployment"
|
||||
},
|
||||
{
|
||||
"title": "Databases",
|
||||
"content": "v3/database"
|
||||
},
|
||||
{
|
||||
"title": "Access Control",
|
||||
"content": "v3/rbac"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Guides",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Setup Environment",
|
||||
"content": "v3/environment-vars"
|
||||
},
|
||||
{
|
||||
"title": "Use Badges",
|
||||
"content": "v3/status-badges"
|
||||
},
|
||||
{
|
||||
"title": "Setup Monitors",
|
||||
"content": "v3/monitors"
|
||||
},
|
||||
{
|
||||
"title": "Setup Triggers",
|
||||
"content": "v3/triggers"
|
||||
},
|
||||
{
|
||||
"title": "Setup Site",
|
||||
"content": "v3/site"
|
||||
},
|
||||
{
|
||||
"title": "Setup SEO",
|
||||
"content": "v3/seo"
|
||||
},
|
||||
{
|
||||
"title": "Setup Home",
|
||||
"content": "v3/home-page"
|
||||
},
|
||||
{
|
||||
"title": "Setup Theme",
|
||||
"content": "v3/theme"
|
||||
},
|
||||
|
||||
{
|
||||
"title": "API Keys",
|
||||
"content": "v3/apikeys"
|
||||
},
|
||||
{
|
||||
"title": "Incident Management",
|
||||
"content": "v3/incident-management"
|
||||
},
|
||||
{
|
||||
"title": "Embed",
|
||||
"content": "v3/embed"
|
||||
},
|
||||
{
|
||||
"title": "Custom JS/CSS",
|
||||
"content": "v3/custom-js-css-guide"
|
||||
},
|
||||
{
|
||||
"title": "Internationalization",
|
||||
"content": "v3/i18n"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Monitors",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "API/Website Monitor",
|
||||
"content": "v3/monitors-api"
|
||||
},
|
||||
{
|
||||
"title": "Ping Monitor",
|
||||
"content": "v3/monitors-ping"
|
||||
},
|
||||
{
|
||||
"title": "TCP Monitor",
|
||||
"content": "v3/monitors-tcp"
|
||||
},
|
||||
{
|
||||
"title": "DNS Monitor",
|
||||
"content": "v3/monitors-dns"
|
||||
},
|
||||
{
|
||||
"title": "Group Monitor",
|
||||
"content": "v3/monitors-group"
|
||||
},
|
||||
{
|
||||
"title": "SSL Monitor",
|
||||
"content": "v3/monitors-ssl"
|
||||
},
|
||||
{
|
||||
"title": "SQL Monitor",
|
||||
"content": "v3/monitors-sql"
|
||||
},
|
||||
{
|
||||
"title": "Heartbeat Monitor",
|
||||
"content": "v3/monitors-heartbeat"
|
||||
},
|
||||
{
|
||||
"title": "Gamedig Monitor",
|
||||
"content": "v3/monitors-gamedig"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "API Reference",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Kener APIs",
|
||||
"content": "v3/kener-apis"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Help",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Fonts",
|
||||
"content": "v3/custom-fonts"
|
||||
},
|
||||
{
|
||||
"title": "Changelogs",
|
||||
"content": "v3/changelogs"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"sidebar": [
|
||||
{
|
||||
"group": "Getting Started",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"content": "v3/home"
|
||||
},
|
||||
{
|
||||
"title": "Get Started",
|
||||
"content": "v3/quick-start"
|
||||
},
|
||||
{
|
||||
"title": "Concepts",
|
||||
"content": "v3/concepts"
|
||||
},
|
||||
{
|
||||
"title": "Deployment",
|
||||
"content": "v3/deployment"
|
||||
},
|
||||
{
|
||||
"title": "Databases",
|
||||
"content": "v3/database"
|
||||
},
|
||||
{
|
||||
"title": "Access Control",
|
||||
"content": "v3/rbac"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Guides",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Setup Environment",
|
||||
"content": "v3/environment-vars"
|
||||
},
|
||||
{
|
||||
"title": "Use Badges",
|
||||
"content": "v3/status-badges"
|
||||
},
|
||||
{
|
||||
"title": "Setup Monitors",
|
||||
"content": "v3/monitors"
|
||||
},
|
||||
{
|
||||
"title": "Setup Triggers",
|
||||
"content": "v3/triggers"
|
||||
},
|
||||
{
|
||||
"title": "Setup Site",
|
||||
"content": "v3/site"
|
||||
},
|
||||
{
|
||||
"title": "Setup SEO",
|
||||
"content": "v3/seo"
|
||||
},
|
||||
{
|
||||
"title": "Setup Home",
|
||||
"content": "v3/home-page"
|
||||
},
|
||||
{
|
||||
"title": "Setup Theme",
|
||||
"content": "v3/theme"
|
||||
},
|
||||
|
||||
{
|
||||
"title": "API Keys",
|
||||
"content": "v3/apikeys"
|
||||
},
|
||||
{
|
||||
"title": "Incident Management",
|
||||
"content": "v3/incident-management"
|
||||
},
|
||||
{
|
||||
"title": "Embed",
|
||||
"content": "v3/embed"
|
||||
},
|
||||
{
|
||||
"title": "Custom JS/CSS",
|
||||
"content": "v3/custom-js-css-guide"
|
||||
},
|
||||
{
|
||||
"title": "Internationalization",
|
||||
"content": "v3/i18n"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Monitors",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "API/Website Monitor",
|
||||
"content": "v3/monitors-api"
|
||||
},
|
||||
{
|
||||
"title": "Ping Monitor",
|
||||
"content": "v3/monitors-ping"
|
||||
},
|
||||
{
|
||||
"title": "TCP Monitor",
|
||||
"content": "v3/monitors-tcp"
|
||||
},
|
||||
{
|
||||
"title": "DNS Monitor",
|
||||
"content": "v3/monitors-dns"
|
||||
},
|
||||
{
|
||||
"title": "Group Monitor",
|
||||
"content": "v3/monitors-group"
|
||||
},
|
||||
{
|
||||
"title": "SSL Monitor",
|
||||
"content": "v3/monitors-ssl"
|
||||
},
|
||||
{
|
||||
"title": "SQL Monitor",
|
||||
"content": "v3/monitors-sql"
|
||||
},
|
||||
{
|
||||
"title": "Heartbeat Monitor",
|
||||
"content": "v3/monitors-heartbeat"
|
||||
},
|
||||
{
|
||||
"title": "Gamedig Monitor",
|
||||
"content": "v3/monitors-gamedig"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "API Reference",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Kener APIs",
|
||||
"content": "v3/kener-apis"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"group": "Help",
|
||||
"collapsible": false,
|
||||
"pages": [
|
||||
{
|
||||
"title": "Fonts",
|
||||
"content": "v3/custom-fonts"
|
||||
},
|
||||
{
|
||||
"title": "Changelogs",
|
||||
"content": "v3/changelogs"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"footerLinks": [
|
||||
{
|
||||
"name": "GitHub",
|
||||
|
||||
@@ -7,7 +7,15 @@
|
||||
"definitions": {
|
||||
"navTab": {
|
||||
"type": "object",
|
||||
"required": ["name", "url"],
|
||||
"required": ["name"],
|
||||
"anyOf": [
|
||||
{
|
||||
"required": ["sidebar"]
|
||||
},
|
||||
{
|
||||
"required": ["url"]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
@@ -15,7 +23,14 @@
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL path for the tab"
|
||||
"description": "Optional redirect URL for tabs that navigate away instead of rendering a sidebar"
|
||||
},
|
||||
"sidebar": {
|
||||
"type": "array",
|
||||
"description": "Sidebar navigation groups for this tab",
|
||||
"items": {
|
||||
"$ref": "#/definitions/sidebarGroup"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -103,18 +118,11 @@
|
||||
},
|
||||
"versionContent": {
|
||||
"type": "object",
|
||||
"required": ["sidebar"],
|
||||
"required": ["navigation"],
|
||||
"properties": {
|
||||
"navigation": {
|
||||
"$ref": "#/definitions/navigation"
|
||||
},
|
||||
"sidebar": {
|
||||
"type": "array",
|
||||
"description": "Sidebar navigation groups",
|
||||
"items": {
|
||||
"$ref": "#/definitions/sidebarGroup"
|
||||
}
|
||||
},
|
||||
"footerLinks": {
|
||||
"type": "array",
|
||||
"description": "Links displayed in the footer/navbar",
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
<!-- Sidebar -->
|
||||
<aside
|
||||
class="bg-background fixed top-24 bottom-0 left-0 z-40 w-[240px] -translate-x-full overflow-y-auto transition-transform duration-300 ease-in-out lg:translate-x-0"
|
||||
class="bg-background scrollbar-hidden fixed top-24 bottom-0 left-0 z-40 w-[240px] -translate-x-full overflow-y-auto transition-transform duration-300 ease-in-out lg:translate-x-0"
|
||||
class:translate-x-0={isMobileMenuOpen}
|
||||
>
|
||||
<DocsSidebar config={data.config} currentSlug={data.currentSlug} onNavigate={closeMobileMenu} />
|
||||
|
||||
@@ -21,7 +21,7 @@ export const load: PageServerLoad = async ({ params }) => {
|
||||
}
|
||||
|
||||
const requestedVersion = versionSlug;
|
||||
const config = getDocsConfig(requestedVersion);
|
||||
const config = getDocsConfig(requestedVersion, undefined, pageSlug);
|
||||
const slug = resolvePageSlugForConfig(pageSlug, config, requestedVersion);
|
||||
|
||||
if (!pageSlug) {
|
||||
|
||||
@@ -8,9 +8,10 @@ export const load: LayoutServerLoad = async ({ params, url, parent }) => {
|
||||
const pathParts = url.pathname.split("/docs/");
|
||||
const pathAfterDocs = pathParts[1] || "";
|
||||
const { versionSlug, pageSlug } = resolveVersionedDocsSlug(pathAfterDocs);
|
||||
const requestedTab = url.searchParams.get("tab") ?? undefined;
|
||||
|
||||
const requestedVersion = versionSlug;
|
||||
const config = getDocsConfig(requestedVersion);
|
||||
const config = getDocsConfig(requestedVersion, requestedTab, pageSlug);
|
||||
const currentSlug = resolvePageSlugForConfig(pageSlug, config, requestedVersion) ?? pageSlug;
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
<script lang="ts">
|
||||
import type { DocsConfig } from "$lib/types/docs";
|
||||
|
||||
import { browser } from "$app/environment";
|
||||
import { goto } from "$app/navigation";
|
||||
import Sun from "@lucide/svelte/icons/sun";
|
||||
import Moon from "@lucide/svelte/icons/moon";
|
||||
@@ -23,11 +24,15 @@
|
||||
let { config, currentSlug, onMenuToggle, isMobileMenuOpen = false }: Props = $props();
|
||||
let searchOpen = $state(false);
|
||||
|
||||
function isActiveTab(url: string): boolean {
|
||||
if (url === "/docs") {
|
||||
return currentSlug === "" || !currentSlug.startsWith("api-reference");
|
||||
}
|
||||
return currentSlug.startsWith(url.replace("/docs/", ""));
|
||||
type DocsNavTab = NonNullable<DocsConfig["navigation"]>["tabs"][number];
|
||||
|
||||
function getDefaultTabKey(): string | null {
|
||||
const tabs = config.navigation?.tabs ?? [];
|
||||
return tabs.find((tab) => (tab.sidebar?.length ?? 0) > 0)?.key ?? tabs[0]?.key ?? null;
|
||||
}
|
||||
|
||||
function getActiveTabKey(): string | null {
|
||||
return config.activeTabKey ?? getDefaultTabKey();
|
||||
}
|
||||
|
||||
function getVersionHref(versionSlug: string): string {
|
||||
@@ -62,6 +67,38 @@
|
||||
await goto(getVersionHref(versionSlug));
|
||||
}
|
||||
|
||||
function getTabHref(tab: DocsNavTab): string {
|
||||
if (tab.url) {
|
||||
return tab.url;
|
||||
}
|
||||
|
||||
const activeVersion = config.activeVersion;
|
||||
const firstPageSlug = tab.firstPageSlug;
|
||||
|
||||
if (!activeVersion) {
|
||||
return "/docs";
|
||||
}
|
||||
|
||||
if (!firstPageSlug) {
|
||||
return `/docs/${activeVersion}`;
|
||||
}
|
||||
|
||||
const normalizedFirstPageSlug = firstPageSlug.startsWith(`${activeVersion}/`)
|
||||
? firstPageSlug.slice(activeVersion.length + 1)
|
||||
: firstPageSlug;
|
||||
|
||||
return `/docs/${activeVersion}/${normalizedFirstPageSlug}`;
|
||||
}
|
||||
|
||||
async function selectTab(tab: DocsNavTab) {
|
||||
if (tab.url) {
|
||||
window.location.href = tab.url;
|
||||
return;
|
||||
}
|
||||
|
||||
await goto(getTabHref(tab));
|
||||
}
|
||||
|
||||
function openSearch() {
|
||||
searchOpen = true;
|
||||
}
|
||||
@@ -168,16 +205,17 @@
|
||||
</div>
|
||||
|
||||
<!-- Sub Navbar with Tabs -->
|
||||
{#if config.navigation?.tabs}
|
||||
{#if config.navigation?.tabs && config.navigation.tabs.length > 1}
|
||||
<div class="border-border/50 px-0">
|
||||
<nav class="mx-auto flex h-10 items-center gap-1 px-4">
|
||||
{#each config.navigation.tabs as tab (tab.url)}
|
||||
<nav class="mx-auto flex h-10 items-center gap-2 px-4">
|
||||
{#each config.navigation.tabs as tab, index (`${tab.name}-${index}`)}
|
||||
<Button
|
||||
href={tab.url}
|
||||
rel="external"
|
||||
variant={!isActiveTab(tab.url) ? "ghost" : "secondary"}
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class=""
|
||||
class="rounded-none border-0 {tab.key === getActiveTabKey()
|
||||
? 'border-b-accent-foreground! border-b!'
|
||||
: ''}"
|
||||
onclick={() => selectTab(tab)}
|
||||
>
|
||||
{tab.name}
|
||||
</Button>
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
|
||||
{#if items.length > 0}
|
||||
<aside
|
||||
class="sticky top-[calc(96px+2rem)] hidden max-h-[calc(100vh-96px-4rem)] w-[220px] shrink-0 overflow-y-auto xl:block"
|
||||
class="scrollbar-hidden sticky top-[calc(96px+2rem)] hidden max-h-[calc(100vh-96px-4rem)] w-[220px] shrink-0 overflow-y-auto xl:block"
|
||||
>
|
||||
<div class="text-muted-foreground mb-3 text-xs font-semibold tracking-wide uppercase">On this page</div>
|
||||
<nav>
|
||||
|
||||
@@ -1,401 +1,67 @@
|
||||
---
|
||||
title: "Alert Configurations"
|
||||
description: "Learn how to create and manage alert configurations that define when and how alerts are triggered in Kener."
|
||||
description: "Create alert rules with thresholds, severity, and trigger routing"
|
||||
---
|
||||
|
||||
Alert configurations define the rules that determine when alerts should be triggered. Each configuration monitors a specific metric on a monitor and sends notifications through configured triggers when conditions are met.
|
||||
Alert configurations define when notifications should be sent for a monitor.
|
||||
|
||||
## Accessing Alerts {#accessing-alerts}
|
||||
## Required settings {#required-settings}
|
||||
|
||||
Navigate to **Manage > Alerts** or visit `/manage/app/alerts` to view all your alert configurations.
|
||||
Each configuration needs:
|
||||
|
||||
The alerts page shows:
|
||||
- monitor
|
||||
- alert type (`STATUS`, `LATENCY`, or `UPTIME`)
|
||||
- alert value
|
||||
- failure threshold
|
||||
- success threshold
|
||||
- at least one trigger
|
||||
|
||||
- All configured alerts across your monitors
|
||||
- Alert status (Active/Inactive)
|
||||
- Alert type (STATUS, LATENCY, UPTIME)
|
||||
- Severity level (CRITICAL, WARNING)
|
||||
- Associated triggers
|
||||
- Quick actions to edit or view logs
|
||||
## Alert value by type {#alert-value-by-type}
|
||||
|
||||
## Creating an Alert Configuration {#creating-alert-configuration}
|
||||
| Alert type | Value format |
|
||||
| :--------- | :-------------------------------- |
|
||||
| `STATUS` | `DOWN` or `DEGRADED` |
|
||||
| `LATENCY` | latency threshold in milliseconds |
|
||||
| `UPTIME` | minimum uptime percentage |
|
||||
|
||||
Click **Create Alert** to create a new alert configuration.
|
||||
## Threshold behavior {#threshold-behavior}
|
||||
|
||||
### 1. Select Monitor {#select-monitor}
|
||||
- **Failure threshold**: consecutive failing evaluations before `TRIGGERED`
|
||||
- **Success threshold**: consecutive passing evaluations before `RESOLVED`
|
||||
|
||||
Choose which monitor this alert should watch. This cannot be changed after creation, so choose carefully.
|
||||
Higher thresholds reduce noise; lower thresholds detect faster.
|
||||
|
||||
### 2. Choose Alert Type {#choose-alert-type}
|
||||
## Severity and incidents {#severity-and-incidents}
|
||||
|
||||
Kener supports three types of alerts:
|
||||
- Severity is `CRITICAL` or `WARNING`.
|
||||
- Optional incident creation links alert events to incidents.
|
||||
|
||||
#### STATUS Alerts {#status-alerts}
|
||||
## Common patterns {#common-patterns}
|
||||
|
||||
Monitor the health status of your service.
|
||||
### Critical outage alert {#critical-outage-alert}
|
||||
|
||||
**Alert Values:**
|
||||
- Type: `STATUS`
|
||||
- Value: `DOWN`
|
||||
- Failure threshold: `1`
|
||||
- Success threshold: `2`
|
||||
- Severity: `CRITICAL`
|
||||
|
||||
- `DOWN` - Service is completely unavailable
|
||||
- `DEGRADED` - Service is partially unavailable or degraded
|
||||
### Performance alert {#performance-alert}
|
||||
|
||||
**Example Use Case:**
|
||||
Alert when a critical API endpoint goes down:
|
||||
|
||||
- Alert Type: STATUS
|
||||
- Alert Value: DOWN
|
||||
- Failure Threshold: 1 (immediate alert)
|
||||
- Success Threshold: 2 (ensure it's stable before resolving)
|
||||
|
||||
#### LATENCY Alerts {#latency-alerts}
|
||||
|
||||
Monitor response time and performance.
|
||||
|
||||
**Alert Value:** Threshold in milliseconds
|
||||
|
||||
**Example Use Case:**
|
||||
Alert when API response time exceeds 1000ms:
|
||||
|
||||
- Alert Type: LATENCY
|
||||
- Alert Value: 1000 (ms)
|
||||
- Failure Threshold: 3 (three consecutive slow responses)
|
||||
- Success Threshold: 5 (five fast responses to resolve)
|
||||
|
||||
#### UPTIME Alerts {#uptime-alerts}
|
||||
|
||||
Monitor overall availability over time.
|
||||
|
||||
**Alert Value:** Minimum uptime percentage (0-100)
|
||||
|
||||
**Example Use Case:**
|
||||
Alert when uptime falls below 99.9%:
|
||||
|
||||
- Alert Type: UPTIME
|
||||
- Alert Value: 99.9 (%)
|
||||
- Failure Threshold: 5 (check over 5 measurement periods)
|
||||
- Success Threshold: 3 (recover after 3 good periods)
|
||||
|
||||
### 3. Configure Thresholds {#configure-thresholds}
|
||||
|
||||
Thresholds prevent false alarms and ensure issues are real before alerting.
|
||||
|
||||
#### Failure Threshold {#failure-threshold}
|
||||
|
||||
Number of consecutive failing checks before triggering an alert.
|
||||
|
||||
**Recommended Values:**
|
||||
|
||||
- **1-2**: Critical services where any failure matters (production databases, payment systems)
|
||||
- **3-5**: Normal services where occasional failures are tolerable
|
||||
- **5+**: Services with known intermittent issues where you want to filter noise
|
||||
|
||||
#### Success Threshold {#success-threshold}
|
||||
|
||||
Number of consecutive successful checks before resolving an alert.
|
||||
|
||||
**Recommended Values:**
|
||||
|
||||
- **1-2**: Non-critical services where quick resolution is fine
|
||||
- **3-5**: Critical services where you want to ensure stability
|
||||
- **5+**: Services that have flaky recovery patterns
|
||||
|
||||
### 4. Set Severity {#set-severity}
|
||||
|
||||
Choose the severity level for this alert:
|
||||
|
||||
- **CRITICAL**: Immediate attention required, major impact
|
||||
- **WARNING**: Attention needed, minor or potential impact
|
||||
|
||||
Severity helps prioritize alerts and can be used in templates to customize notification appearance.
|
||||
|
||||
### 5. Incident Creation {#incident-creation}
|
||||
|
||||
**Create Incident:** YES / NO
|
||||
|
||||
When set to YES:
|
||||
|
||||
- An incident is automatically created when the alert triggers
|
||||
- The incident appears on your status page
|
||||
- Subscribe users receive notifications about the incident
|
||||
- The incident is linked to the alert in alert logs
|
||||
|
||||
When set to NO:
|
||||
|
||||
- Alert triggers but no incident is created
|
||||
- Good for internal monitoring that shouldn't appear on status page
|
||||
- Alert logs still track all events
|
||||
|
||||
### 6. Description (Optional) {#description}
|
||||
|
||||
Add a description to help your team understand:
|
||||
|
||||
- Why this alert exists
|
||||
- What the threshold means
|
||||
- What actions to take when it triggers
|
||||
- Known false positives or edge cases
|
||||
|
||||
The description is for internal use and doesn't appear in notifications.
|
||||
|
||||
### 7. Select Triggers {#select-triggers}
|
||||
|
||||
Choose which notification channels should receive alerts:
|
||||
|
||||
- Select one or more triggers (Discord, Slack, Email, Webhook)
|
||||
- You must create triggers first if none exist
|
||||
- All selected triggers will receive notifications on both TRIGGERED and RESOLVED events
|
||||
- Different alerts can use different triggers
|
||||
|
||||
**Tip:** Create a test trigger first to validate your alert configuration before using production channels.
|
||||
|
||||
### 8. Save Configuration {#save-configuration}
|
||||
|
||||
Click **Create Alert** to save your configuration.
|
||||
|
||||
- New alerts are active by default
|
||||
- You'll be redirected to the alert edit page where you can view the ID
|
||||
- The alert will start evaluating on the next monitor check
|
||||
|
||||
## Managing Alert Configurations {#managing-alert-configurations}
|
||||
|
||||
### Editing Alerts {#editing-alerts}
|
||||
|
||||
Click **Edit** on any alert to modify its configuration.
|
||||
|
||||
You can change:
|
||||
|
||||
- Alert type and value
|
||||
- Thresholds
|
||||
- Severity
|
||||
- Incident creation setting
|
||||
- Description
|
||||
- Associated triggers
|
||||
- Active status
|
||||
|
||||
**Note:** You cannot change the monitor after creation. Create a new alert if you need a different monitor.
|
||||
|
||||
### Activating/Deactivating Alerts {#activating-deactivating-alerts}
|
||||
|
||||
Use the toggle switch on each alert card to quickly activate or deactivate alerts without deleting them.
|
||||
|
||||
**When to Deactivate:**
|
||||
-During planned maintenance
|
||||
|
||||
- When testing a monitor
|
||||
- When temporarily not needed
|
||||
- To prevent alert fatigue during known issues
|
||||
|
||||
Deactivated alerts:
|
||||
|
||||
- Don't evaluate conditions
|
||||
- Don't send notifications
|
||||
- Retain all historical data and configuration
|
||||
- Can be reactivated instantly
|
||||
|
||||
### Viewing Alert Logs {#viewing-alert-logs}
|
||||
|
||||
Click ** Logs** on any alert to view its alert event history.
|
||||
|
||||
Alert logs show:
|
||||
|
||||
- All TRIGGERED and RESOLVED events
|
||||
- Timestamp of each event
|
||||
- Associated incidents (if any)
|
||||
- Actions: Change status or delete events
|
||||
|
||||
**Filtering:**
|
||||
|
||||
- ALL: Show all alert events
|
||||
- TRIGGERED: Show only active alerts
|
||||
- RESOLVED: Show only resolved alerts
|
||||
|
||||
**Managing Alert Events:**
|
||||
-Click the status dropdown to manually change between TRIGGERED/RESOLVED
|
||||
|
||||
- Click the trash icon to delete an alert event
|
||||
- If event has an incident, you can choose to also delete the incident
|
||||
|
||||
### Deleting Alert Configurations {#deleting-alert-configurations}
|
||||
|
||||
1. Open the alert configuration you want to delete
|
||||
2. Scroll to the **Danger Zone** section
|
||||
3. Click **Delete Alert**
|
||||
4. Confirm deletion
|
||||
|
||||
**Warning:** This action cannot be undone.
|
||||
|
||||
**What Gets Deleted:**
|
||||
|
||||
- The alert configuration
|
||||
- All associated alert events (cascading delete)
|
||||
- Scheduled evaluations
|
||||
|
||||
**What Doesn't Get Deleted:**
|
||||
|
||||
- The monitor
|
||||
- Associated triggers
|
||||
- Created incidents (they become orphaned but remain on status page)
|
||||
|
||||
## Filtering and Organization {#filtering-organization}
|
||||
|
||||
### Filter by Monitor {#filter-by-monitor}
|
||||
|
||||
Use the monitor dropdown to view alerts for a specific monitor. This helps when:
|
||||
|
||||
- Reviewing alerts for a critical service
|
||||
- Auditing alert coverage for a monitor
|
||||
- Troubleshooting alert behavior
|
||||
|
||||
### Pagination {#pagination}
|
||||
|
||||
Alert configurations are paginated (20 per page) for performance. Use the pagination controls to browse all your alerts.
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Start Conservative {#start-conservative}
|
||||
|
||||
Begin with higher thresholds (3-5 failures) to prevent alert fatigue. Lower thresholds after validating alert accuracy.
|
||||
|
||||
### Use Descriptive Names {#use-descriptive-names}
|
||||
|
||||
The alert is automatically named based on monitor + alert type, but add descriptions to explain the "why" behind each alert.
|
||||
|
||||
### Group Related Alerts {#group-related-alerts}
|
||||
|
||||
For critical services:
|
||||
|
||||
- STATUS alert with threshold 1 (immediate)
|
||||
- LATENCY alert with threshold 3 (performance degradation)
|
||||
- UPTIME alert with threshold 5 (trend monitoring)
|
||||
|
||||
### Severity Guidelines {#severity-guidelines}
|
||||
|
||||
**CRITICAL:**
|
||||
|
||||
- Production outages
|
||||
- Data loss risks
|
||||
- Security incidents
|
||||
- Revenue-impacting issues
|
||||
|
||||
**WARNING:**
|
||||
|
||||
- Performance degradation
|
||||
- Approaching limits
|
||||
- Non-critical service issues
|
||||
- Maintenance reminders
|
||||
|
||||
### Test Before Production {#test-before-production}
|
||||
|
||||
1. Create a test trigger (webhook to a test endpoint)
|
||||
2. Create your alert with the test trigger
|
||||
3. Verify it triggers correctly
|
||||
4. Replace test trigger with production triggers
|
||||
|
||||
### Document Actions {#document-actions}
|
||||
|
||||
In the description field, include:
|
||||
|
||||
```
|
||||
Purpose: Monitor API response time for checkout endpoint
|
||||
Threshold: 500ms indicates database query issues
|
||||
Action: Check DB slow query log, restart API if needed
|
||||
Escalation: Page on-call after 10 minutes
|
||||
Known Issues: Spikes during daily batch job (3-4am UTC)
|
||||
```
|
||||
|
||||
### Review Regularly {#review-regularly}
|
||||
|
||||
Quarterly review:
|
||||
|
||||
- Are alerts still relevant?
|
||||
- Are thresholds tuned correctly?
|
||||
- Are there false positives?
|
||||
- Are there gaps in coverage?
|
||||
- Are triggers up to date?
|
||||
|
||||
## Alert Configuration Examples {#alert-configuration-examples}
|
||||
|
||||
### Simple Status Alert {#simple-status-alert}
|
||||
|
||||
```
|
||||
Monitor: Production API
|
||||
Alert Type: STATUS
|
||||
Alert Value: DOWN
|
||||
Failure Threshold: 1
|
||||
Success Threshold: 2
|
||||
Severity: CRITICAL
|
||||
Create Incident: YES
|
||||
Triggers: PagerDuty, Slack #oncall
|
||||
```
|
||||
|
||||
### Performance Degradation {#performance-degradation}
|
||||
|
||||
```
|
||||
Monitor: Web Application
|
||||
Alert Type: LATENCY
|
||||
Alert Value: 2000
|
||||
Failure Threshold: 5
|
||||
Success Threshold: 10
|
||||
Severity: WARNING
|
||||
Create Incident: NO
|
||||
Triggers: Slack #performance
|
||||
```
|
||||
|
||||
### SLA Monitoring {#sla-monitoring}
|
||||
|
||||
```
|
||||
Monitor: Payment Gateway
|
||||
Alert Type: UPTIME
|
||||
Alert Value: 99.9
|
||||
Failure Threshold: 6
|
||||
Success Threshold: 12
|
||||
Severity: CRITICAL
|
||||
Create Incident: YES
|
||||
Triggers: Email (SRE Team), PagerDuty
|
||||
```
|
||||
- Type: `LATENCY`
|
||||
- Value: `1000`
|
||||
- Failure threshold: `3`
|
||||
- Success threshold: `5`
|
||||
- Severity: `WARNING`
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Alert Not Triggering {#alert-not-triggering}
|
||||
- **Not triggering**: verify configuration is active and conditions actually fail consecutively
|
||||
- **Not resolving**: verify success threshold and check for intermittent failures
|
||||
- **No notifications**: verify attached trigger is active and correctly configured
|
||||
|
||||
**Check:**
|
||||
## Related pages {#related-pages}
|
||||
|
||||
1. Is the alert active? (toggle switch on)
|
||||
2. Is the monitor running? (check monitor schedule)
|
||||
3. Are conditions actually met? (check monitor data)
|
||||
4. Are thresholds too high? (lower failure threshold temporarily to test)
|
||||
5. View alert logs to see evaluation history
|
||||
|
||||
### Too Many Alerts (False Positives) {#too-many-alerts}
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. Increase failure threshold
|
||||
2. Adjust alert value to be less sensitive
|
||||
3. Review if monitor is checking correctly
|
||||
4. Add description documenting known false positive patterns
|
||||
5. Temporarily deactivate during maintenance
|
||||
|
||||
### Alert Not Resolving {#alert-not-resolving}
|
||||
|
||||
**Check**:
|
||||
|
||||
1. Success threshold may be too high
|
||||
2. Monitor may still be failing intermittently
|
||||
3. Manually resolve from alert logs if needed
|
||||
4. Review success threshold setting
|
||||
|
||||
### Missing Notifications {#missing-notifications}
|
||||
|
||||
**Check:**
|
||||
|
||||
1. Are triggers configured and active?
|
||||
2. Check trigger logs for errors
|
||||
3. Verify trigger authentication/tokens
|
||||
4. Test trigger independently
|
||||
5. Check if trigger rate limits are hit
|
||||
|
||||
## Next Steps {#next-steps-configs}
|
||||
|
||||
- [Triggers](/docs/alerting/triggers) - Set up notification channels
|
||||
- [Templates](/docs/alerting/templates) - Customize notification messages
|
||||
- [Webhook Examples](/docs/alerting/webhook-examples) - See webhook integration examples
|
||||
- [Triggers](/docs/alerting/triggers)
|
||||
- [Templates](/docs/alerting/templates)
|
||||
- [Webhook Examples](/docs/alerting/webhook-examples)
|
||||
|
||||
@@ -1,118 +1,59 @@
|
||||
---
|
||||
title: "Alerting Overview"
|
||||
description: "Understand how Kener's alerting system works, including alert types, triggers, and the alert lifecycle."
|
||||
description: "How alerts are evaluated, triggered, and delivered in Kener"
|
||||
---
|
||||
|
||||
Kener's alerting system enables you to get notified when your monitors detect issues. The alerting system is designed to be flexible and powerful, allowing you to configure exactly when and how you want to be notified.
|
||||
Kener alerting turns monitor signals into notifications through configured triggers.
|
||||
|
||||
## How Alerting Works {#how-alerting-works}
|
||||
|
||||
The alerting flow in Kener follows this sequence:
|
||||
## Flow {#flow}
|
||||
|
||||
```
|
||||
Monitor → Alert Configuration → Condition Met → Alert Event → Trigger → Notification
|
||||
Monitor result -> Alert configuration evaluation -> Alert event -> Trigger delivery
|
||||
```
|
||||
|
||||
1. **Monitor** - Your monitor runs and collects data (status, latency, etc.)
|
||||
2. **Alert Configuration** - Defines the conditions that should trigger an alert
|
||||
3. **Condition Met** - The alert configuration thresholds are reached
|
||||
4. **Alert Event** - An alert record is created in the system
|
||||
5. **Trigger** - The notification channel (Discord, Slack, Email, Webhook, etc.)
|
||||
6. **Notification** - You receive the alert through your configured channel(s)
|
||||
## Alert types {#alert-types}
|
||||
|
||||
## Key Concepts {#key-concepts}
|
||||
- **STATUS**: evaluates monitor status (`DOWN` or `DEGRADED` targets)
|
||||
- **LATENCY**: evaluates latency threshold in milliseconds
|
||||
- **UPTIME**: evaluates uptime percentage threshold
|
||||
|
||||
### Alert Configurations {#alert-configurations}
|
||||
## Trigger lifecycle {#trigger-lifecycle}
|
||||
|
||||
Alert configurations are rules that define when alerts should be triggered. Each configuration includes:
|
||||
Each alert event is sent as:
|
||||
|
||||
- **Monitor** - Which monitor to watch
|
||||
- **Alert Type** - What metric to monitor (STATUS, LATENCY, or UPTIME)
|
||||
- **Thresholds** - How many consecutive failures/successes before triggering/resolving
|
||||
- **Severity** - CRITICAL or WARNING
|
||||
- **Triggers** - Which notification channels to use
|
||||
- `TRIGGERED` when failure threshold is reached
|
||||
- `RESOLVED` when success threshold is reached
|
||||
|
||||
### Alert Types {#alert-types}
|
||||
If incident creation is enabled on the configuration, Kener can also create/update incidents from alert events.
|
||||
|
||||
Kener supports three types of alerts:
|
||||
## Supported trigger providers {#supported-trigger-providers}
|
||||
|
||||
**STATUS Alerts**
|
||||
Monitor the status of your service (DOWN or DEGRADED). Use this when you want to be notified immediately when a service goes down.
|
||||
Current runtime supports:
|
||||
|
||||
**LATENCY Alerts**
|
||||
Monitor response time. Trigger alerts when latency exceeds a threshold (in milliseconds) for consecutive checks.
|
||||
- `webhook`
|
||||
- `discord`
|
||||
- `slack`
|
||||
- `email`
|
||||
|
||||
**UPTIME Alerts**
|
||||
Monitor overall uptime percentage. Trigger alerts when uptime falls below a specified percentage over a time period.
|
||||
## Template variable model {#template-variable-model}
|
||||
|
||||
### Triggers (Notification Channels) {#triggers}
|
||||
Trigger templates render with alert and site variables (for example `{{alert_name}}`, `{{alert_status}}`, `{{site_name}}`, `{{site_url}}`).
|
||||
|
||||
Triggers are the notification channels where alerts are sent. Kener supports:
|
||||
Use [Templates](/docs/alerting/templates) for the canonical variable list.
|
||||
|
||||
- **Webhook** - Send HTTP POST requests to custom endpoints
|
||||
- **Discord** - Send messages to Discord channels
|
||||
- **Slack** - Send messages to Slack channels
|
||||
- **Email** - Send emails via Resend or SMTP
|
||||
## Secret interpolation {#secret-interpolation}
|
||||
|
||||
Each trigger can be configured with:
|
||||
Secrets are interpolated from environment variables using `$VAR_NAME` syntax (not `{{env.VAR_NAME}}`).
|
||||
|
||||
- Custom templates using mustache syntax
|
||||
- Environment variables for secrets (tokens, API keys)
|
||||
- Headers and authentication
|
||||
Example:
|
||||
|
||||
### Alert Events {#alert-events}
|
||||
```
|
||||
Authorization: Bearer $API_TOKEN
|
||||
```
|
||||
|
||||
When an alert configuration's conditions are met, an alert event is created. Alert events have two states:
|
||||
## Next steps {#next-steps}
|
||||
|
||||
- **TRIGGERED** - The alert condition has been met
|
||||
- **RESOLVED** - The alert condition has been resolved (success threshold met)
|
||||
|
||||
You can view all alert events for a specific configuration in the alert logs page.
|
||||
|
||||
### Thresholds {#thresholds}
|
||||
|
||||
Thresholds control when alerts trigger and resolve:
|
||||
|
||||
**Failure Threshold**
|
||||
Number of consecutive failing checks before triggering an alert. This prevents false positives from temporary glitches.
|
||||
|
||||
**Success Threshold**
|
||||
Number of consecutive successful checks before resolving an alert. This ensures the issue is truly fixed before stopping notifications.
|
||||
|
||||
## When to Use Each Alert Type {#when-to-use-each-alert-type}
|
||||
|
||||
### Use STATUS Alerts When: {#use-status-alerts}
|
||||
|
||||
- You need immediate notification when a service goes down
|
||||
- You're monitoring critical services where any downtime matters
|
||||
- You want separate alerts for DEGRADED vs DOWN states
|
||||
|
||||
### Use LATENCY Alerts When: {#use-latency-alerts}
|
||||
|
||||
- Performance degradation is as important as outages
|
||||
- You have SLAs based on response times
|
||||
- You want to catch issues before they cause complete failures
|
||||
|
||||
### Use UPTIME Alerts When: {#use-uptime-alerts}
|
||||
|
||||
- You're monitoring overall reliability over time
|
||||
- You have uptime SLAs (e.g., 99.9% uptime)
|
||||
- You want to be notified about reliability trends
|
||||
|
||||
## Alert Lifecycle {#alert-lifecycle}
|
||||
|
||||
1. **Monitor Checks** - Your monitor runs on its schedule
|
||||
2. **Condition Evaluation** - Alert configurations check if their conditions are met
|
||||
3. **Threshold Counting** - Consecutive failures/successes are counted
|
||||
4. **Alert Triggered** - When failure threshold is reached, alert is TRIGGERED
|
||||
5. **Notification Sent** - All configured triggers send notifications
|
||||
6. **Incident Created** (Optional) - If enabled, an incident is automatically created
|
||||
7. **Alert Resolved** - When success threshold is reached, alert changes to RESOLVED
|
||||
8. **Resolution Notification** - Triggers send resolution notifications
|
||||
|
||||
## Next Steps {#next-steps}
|
||||
|
||||
- [Alert Configurations](/docs/alerting/alert-configurations) - Learn how to create and manage alert configurations
|
||||
- [Triggers](/docs/alerting/triggers) - Set up notification channels
|
||||
- [Templates](/docs/alerting/templates) - Customize notification templates
|
||||
- [Webhook Examples](/docs/alerting/webhook-examples) - See practical webhook examples
|
||||
- [Alert Configurations](/docs/alerting/alert-configurations)
|
||||
- [Triggers](/docs/alerting/triggers)
|
||||
- [Templates](/docs/alerting/templates)
|
||||
- [Webhook Examples](/docs/alerting/webhook-examples)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,353 +1,51 @@
|
||||
---
|
||||
title: "Alert Triggers"
|
||||
description: "Configure notification channels (triggers) including Webhook, Discord, Slack, and Email to receive alert notifications."
|
||||
description: "Configure Webhook, Discord, Slack, and Email delivery channels"
|
||||
---
|
||||
|
||||
Triggers are notification channels that receive alert notifications when your monitor alert configurations are triggered or resolved. Kener supports multiple trigger types including Webhook, Discord, Slack, and Email.
|
||||
Triggers are delivery channels used by alert configurations.
|
||||
|
||||
## What are Triggers? {#what-are-triggers}
|
||||
## Supported trigger types {#supported-trigger-types}
|
||||
|
||||
Triggers define **where** and **how** alert notifications are sent. Each trigger:
|
||||
Runtime currently supports:
|
||||
|
||||
- Has a unique name and type
|
||||
- Can be used by multiple alert configurations
|
||||
- Supports custom templates with mustache syntax
|
||||
- Can include environment variables for secure credential storage
|
||||
- Operates independently (one alert can notify multiple triggers)
|
||||
- `webhook`
|
||||
- `discord`
|
||||
- `slack`
|
||||
- `email`
|
||||
|
||||
## Accessing Triggers {#accessing-triggers}
|
||||
## Trigger meta by type {#trigger-meta-by-type}
|
||||
|
||||
Navigate to **Manage > Triggers** or visit `/manage/app/triggers` to view and manage all your notification channels.
|
||||
| Type | Key fields |
|
||||
| :-------- | :------------------------------------------ |
|
||||
| `webhook` | `url`, `headers`, `webhook_body` |
|
||||
| `discord` | `url`, `discord_body` |
|
||||
| `slack` | `url`, `slack_body` |
|
||||
| `email` | `to`, `from`, `email_subject`, `email_body` |
|
||||
|
||||
## Supported Trigger Types {#supported-trigger-types}
|
||||
> [!NOTE]
|
||||
> For webhook headers, use an array shape: `[{"key":"Authorization","value":"Bearer $TOKEN"}]`.
|
||||
|
||||
### Webhook {#webhook}
|
||||
## Template variables {#template-variables}
|
||||
|
||||
Send HTTP POST requests to any custom endpoint. Perfect for:
|
||||
Trigger bodies/subjects can use Mustache variables. Use the canonical list in [Templates](/docs/alerting/templates).
|
||||
|
||||
- Integrating with tools not natively supported
|
||||
- Custom notification services
|
||||
- Internal monitoring systems
|
||||
- Triggering automation workflows
|
||||
## Secret handling {#secret-handling}
|
||||
|
||||
**Required Configuration:**
|
||||
Use `$VAR_NAME` for runtime environment substitution in URL, headers, and body.
|
||||
|
||||
- URL: The endpoint to POST to
|
||||
- Headers: Optional custom headers (JSON format)
|
||||
- Body: Custom request body with mustache variables
|
||||
Examples:
|
||||
|
||||
See [Webhook Examples](/docs/alerting/webhook-examples) for practical implementations including Telegram, PagerDuty, and more.
|
||||
- URL: `https://hooks.example.com/$HOOK_ID`
|
||||
- Header value: `Bearer $API_TOKEN`
|
||||
- Body value: `"token": "$WEBHOOK_SECRET"`
|
||||
|
||||
### Discord {#discord}
|
||||
## Testing triggers {#testing-triggers}
|
||||
|
||||
Send rich messages to Discord channels via webhooks.
|
||||
Use the trigger test flow in Manage UI to validate payload, auth, and destination behavior before attaching to production alerts.
|
||||
|
||||
**Required Configuration:**
|
||||
## Webhook provider examples {#webhook-provider-examples}
|
||||
|
||||
- Webhook URL: Get from Discord Channel Settings → Integrations → Webhooks
|
||||
Telegram, PagerDuty, Teams, and other provider payload examples are in:
|
||||
|
||||
**Features:**
|
||||
|
||||
- Rich embeds with colors
|
||||
- Customizable titles and descriptions
|
||||
- Automatic color coding (red for TRIGGERED, green for RESOLVED)
|
||||
- Username and avatar customization
|
||||
|
||||
### Slack {#slack}
|
||||
|
||||
Send messages to Slack channels or users via webhooks.
|
||||
|
||||
**Required Configuration:**
|
||||
|
||||
- Webhook URL: Create an incoming webhook at api.slack.com/apps
|
||||
|
||||
**Features:**
|
||||
|
||||
- Rich message blocks
|
||||
- Action buttons
|
||||
- Custom colors and emoji
|
||||
- Thread replies for updates
|
||||
|
||||
### Email {#email}
|
||||
|
||||
Send email notifications via Resend or SMTP.
|
||||
|
||||
**Required Configuration for Resend:**
|
||||
|
||||
- API Key: Your Resend API key (store in environment variable)
|
||||
- From Email: Verified sender email
|
||||
- To Email: Recipient email address
|
||||
|
||||
**Required Configuration for SMTP:**
|
||||
|
||||
- SMTP Host
|
||||
- SMTP Port
|
||||
- SMTP Username
|
||||
- SMTP Password
|
||||
- From Email
|
||||
- To Email
|
||||
|
||||
**Features:**
|
||||
|
||||
- HTML email templates
|
||||
- Plain text fallback
|
||||
- Custom subject lines
|
||||
- Attachment support (images in templates)
|
||||
|
||||
## Creating a Trigger {#creating-trigger}
|
||||
|
||||
1. Navigate to `/manage/app/triggers`
|
||||
2. Click **Create Trigger**
|
||||
3. Fill in the required fields:
|
||||
|
||||
### Basic Information {#basic-information}
|
||||
|
||||
**Name**
|
||||
A descriptive name for this trigger (e.g., "Production Alerts - Slack", "PagerDuty Critical")
|
||||
|
||||
**Type**
|
||||
Choose from Webhook, Discord, Slack, or Email
|
||||
|
||||
**Description** (Optional)
|
||||
Notes about this trigger's purpose, when to use it, or configuration details
|
||||
|
||||
## Trigger Variables (Mustache Syntax) {#trigger-variables}
|
||||
|
||||
All triggers support mustache templating with the following variables:
|
||||
|
||||
### Alert Variables
|
||||
|
||||
| Variable | Description | Example Value |
|
||||
| ----------------------------- | -------------------------------- | ---------------------------------------------------------------------- |
|
||||
| `{{alert_id}}` | Alert event ID | 42 |
|
||||
| `{{alert_name}}` | Auto-generated alert name | "Alert api-prod for STATUS DOWN TRIGGERED at 2024-01-15T10:30:00.000Z" |
|
||||
| `{{alert_for}}` | Alert type | "STATUS", "LATENCY", or "UPTIME" |
|
||||
| `{{alert_value}}` | Alert threshold value | "DOWN", "1000", or "99.9" |
|
||||
| `{{alert_status}}` | Current alert status | "TRIGGERED" or "RESOLVED" |
|
||||
| `{{alert_severity}}` | Alert severity level | "CRITICAL" or "WARNING" |
|
||||
| `{{alert_message}}` | Alert description from config | Custom message text |
|
||||
| `{{alert_source}}` | Source of alert | "ALERT" |
|
||||
| `{{alert_timestamp}}` | ISO 8601 timestamp | "2024-01-15T10:30:00.000Z" |
|
||||
| `{{alert_cta_url}}` | Call-to-action URL | "https://kener.ing/docs/home" |
|
||||
| `{{alert_cta_text}}` | Call-to-action button text | "View Documentation" |
|
||||
| `{{alert_incident_id}}` | Associated incident ID (if any) | 123 or null |
|
||||
| `{{alert_incident_url}}` | Associated incident URL (if any) | "https://kener.ing/incidents/123" or null |
|
||||
| `{{alert_failure_threshold}}` | Failure threshold setting | 3 |
|
||||
| `{{alert_success_threshold}}` | Success threshold setting | 5 |
|
||||
| `{{is_resolved}}` | Boolean if alert is resolved | true/false |
|
||||
| `{{is_triggered}}` | Boolean if alert is triggered | true/false |
|
||||
|
||||
### Site Variables
|
||||
|
||||
| Variable | Description | Example Value |
|
||||
| ------------------------ | ---------------------------- | ------------------------------ |
|
||||
| `{{site_url}}` | Your Kener site URL | "https://status.example.com" |
|
||||
| `{{site_name}}` | Your Kener site name | "My Status Page" |
|
||||
| `{{site_logo_url}}` | Your site logo URL | "https://example.com/logo.png" |
|
||||
| `{{colors_up}}` | Color for UP status | "#10b981" |
|
||||
| `{{colors_down}}` | Color for DOWN status | "#ef4444" |
|
||||
| `{{colors_degraded}}` | Color for DEGRADED status | "#f59e0b" |
|
||||
| `{{colors_maintenance}}` | Color for MAINTENANCE status | "#6b7280" |
|
||||
|
||||
### Conditional Rendering {#conditional-rendering}
|
||||
|
||||
Use mustache sections for conditional content:
|
||||
|
||||
```json
|
||||
{
|
||||
"color": "{{#is_triggered}}16711680{{/is_triggered}}{{#is_resolved}}65280{{/is_resolved}}",
|
||||
"message": "{{#is_triggered}}⚠️ Alert Triggered{{/is_triggered}}{{#is_resolved}}✅ Alert Resolved{{/is_resolved}}"
|
||||
}
|
||||
```
|
||||
|
||||
### Environment Variables {#environment-variables}
|
||||
|
||||
Reference environment variables using `$VARIABLE_NAME`:
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "Bearer $API_TOKEN",
|
||||
"X-API-Key": "$WEBHOOK_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
**Setting Environment Variables:**
|
||||
|
||||
```bash
|
||||
# In .env file
|
||||
API_TOKEN=your_token_here
|
||||
WEBHOOK_SECRET=your_secret_here
|
||||
```
|
||||
|
||||
**Important:** Environment variables can be used anywhere in your template - in URLs, headers, or body. For example:
|
||||
|
||||
```
|
||||
URL: https://hooks.slack.com/services/$SLACK_WEBHOOK_TOKEN
|
||||
```
|
||||
|
||||
## Default Templates {#default-templates}
|
||||
|
||||
Kener provides default templates for each trigger type. You can customize these or create your own.
|
||||
|
||||
### Default Webhook Body {#default-webhook-body}
|
||||
|
||||
See the [Templates documentation](/docs/alerting/templates#webhook-template) for the complete default webhook body template with mustache variables.
|
||||
|
||||
### Default Discord Body {#default-discord-body}
|
||||
|
||||
See the [Templates documentation](/docs/alerting/templates#discord-template) for the complete default Discord message template with embeds.
|
||||
|
||||
### Default Slack Body {#default-slack-body}
|
||||
|
||||
See the [Templates documentation](/docs/alerting/templates#slack-template) for the complete default Slack message template with blocks and attachments.
|
||||
|
||||
### Default Email Template {#default-email-template}
|
||||
|
||||
**Subject:**
|
||||
|
||||
```
|
||||
{{alert_name}}
|
||||
```
|
||||
|
||||
**Body (HTML):**
|
||||
See the [Templates documentation](/docs/alerting/templates#email-template) for the complete HTML email template.
|
||||
|
||||
## Managing Triggers {#managing-triggers}
|
||||
|
||||
### Editing Triggers {#editing-triggers}
|
||||
|
||||
1. Click on a trigger in the list
|
||||
2. Modify any configuration
|
||||
3. Click **Save Changes**
|
||||
|
||||
### Testing Triggers {#testing-triggers}
|
||||
|
||||
Before using a trigger in production:
|
||||
|
||||
1. Create a test alert configuration
|
||||
2. Use low thresholds to trigger quickly
|
||||
3. Manually trigger or wait for conditions
|
||||
4. Verify notifications are received correctly
|
||||
5. Check formatting and variable substitution
|
||||
|
||||
### Activating/Deactivating {#activating-deactivating}
|
||||
|
||||
Use the toggle switch to quickly enable/disable triggers without deleting them.
|
||||
|
||||
**Inactive triggers:**
|
||||
|
||||
- Don't receive alert notifications
|
||||
- Retain all configuration
|
||||
- Can be reactivated instantly
|
||||
|
||||
### Deleting Triggers {#deleting-triggers}
|
||||
|
||||
**Warning:** Deleting a trigger will remove it from all alert configurations using it.
|
||||
|
||||
1. Open the trigger
|
||||
2. Scroll to **Danger Zone**
|
||||
3. Click **Delete Trigger**
|
||||
4. Confirm deletion
|
||||
|
||||
## Best Practices {#best-practices-triggers}
|
||||
|
||||
### Use Environment Variables for Secrets {#use-environment-variables}
|
||||
|
||||
Never hard-code API keys or tokens in trigger configurations:
|
||||
|
||||
**Bad:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "Bearer sk_live_abc123xyz"
|
||||
}
|
||||
```
|
||||
|
||||
**Good:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "Bearer $API_TOKEN"
|
||||
}
|
||||
```
|
||||
|
||||
### Create Separate Triggers for Different Purposes {#create-separate-triggers}
|
||||
|
||||
Instead of one "Slack" trigger, create:
|
||||
|
||||
- "Slack - Critical Alerts" → #oncall channel
|
||||
- "Slack - Warnings" → #monitoring channel
|
||||
- "Slack - Test" → your personal DM
|
||||
|
||||
### Test Before Production {#test-before-production-triggers}
|
||||
|
||||
Always test your triggers with non-critical alerts first.
|
||||
|
||||
### Document Trigger Purpose {#document-trigger-purpose}
|
||||
|
||||
Use the description field to document:
|
||||
|
||||
- What this trigger is for
|
||||
- Who monitors it
|
||||
- When it should be used
|
||||
- Any special configuration notes
|
||||
|
||||
### Monitor Trigger Failures {#monitor-trigger-failures}
|
||||
|
||||
If a trigger consistently fails:
|
||||
|
||||
- Check the trigger logs
|
||||
- Verify credentials/tokens
|
||||
- Check for API rate limits
|
||||
- Validate JSON syntax
|
||||
- Test the endpoint independently
|
||||
|
||||
### Keep Templates Simple {#keep-templates-simple}
|
||||
|
||||
Start with default templates and only customize if needed. Complex templates are harder to debug.
|
||||
|
||||
## Troubleshooting {#troubleshooting-triggers}
|
||||
|
||||
### Trigger Not Sending Notifications {#trigger-not-sending}
|
||||
|
||||
**Check:**
|
||||
|
||||
1. Is trigger active?
|
||||
2. Is trigger selected in alert configuration?
|
||||
3. Are credentials valid?
|
||||
4. Check trigger logs for errors
|
||||
5. Test endpoint independently (curl/Postman)
|
||||
|
||||
### Invalid JSON Error {#invalid-json-error}
|
||||
|
||||
- Validate JSON syntax (use JSONLint.com)
|
||||
- Check for unquoted strings
|
||||
- Verify comma placement
|
||||
- Ensure proper brace matching
|
||||
|
||||
### Variables Not Substituting {#variables-not-substituting}
|
||||
|
||||
- Check variable spelling (case-sensitive)
|
||||
- Ensure proper mustache syntax `{{variable}}`
|
||||
- Verify the variable is available (see variable table above)
|
||||
- Check for extra spaces inside braces
|
||||
|
||||
### Rate Limiting {#rate-limiting}
|
||||
|
||||
Some services rate limit webhooks:
|
||||
|
||||
- Discord: 30 requests per 60 seconds per webhook
|
||||
- Slack: ~1 request per second average
|
||||
- Custom APIs: Check their documentation
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Increase alert thresholds to reduce frequency
|
||||
- Use separate webhooks for different channels
|
||||
- Implement retry logic in custom endpoints
|
||||
|
||||
## Next Steps {#next-steps-triggers}
|
||||
|
||||
- [Templates](/docs/alerting/templates) - Customize notification templates
|
||||
- [Webhook Examples](/docs/alerting/webhook-examples) - See practical webhook examples
|
||||
- [Alert Configurations](/docs/alerting/alert-configurations) - Create alerts using triggers
|
||||
- [Alerting Trigger Examples Guide](/docs/v4/guides/alerting-trigger-examples)
|
||||
|
||||
@@ -1,755 +1,20 @@
|
||||
---
|
||||
title: "Webhook Examples"
|
||||
description: "Practical webhook integration examples for Telegram, PagerDuty, Microsoft Teams, and other popular services."
|
||||
description: "Index page for alerting webhook examples now maintained under Guides"
|
||||
---
|
||||
|
||||
Webhooks allow you to integrate Kener with virtually any service by sending HTTP POST requests with alert data. This guide provides practical examples for popular services and custom integrations.
|
||||
Webhook provider examples are now maintained in Guides.
|
||||
|
||||
## Overview {#overview}
|
||||
## Moved examples {#moved-examples}
|
||||
|
||||
Kener's webhook trigger sends POST requests to your specified URL with:
|
||||
Use:
|
||||
|
||||
- Custom headers (optional)
|
||||
- JSON body with alert data
|
||||
- Mustache variable substitution
|
||||
- Environment variable support for secrets
|
||||
- [Alerting Trigger Examples Guide](/docs/v4/guides/alerting-trigger-examples)
|
||||
|
||||
All examples use environment variables for sensitive credentials. Never hard-code API keys or tokens directly in your webhook configuration.
|
||||
This guide includes Telegram and other common webhook integrations with runtime-compatible variables and `$ENV_VAR` secret usage.
|
||||
|
||||
## Example 1: Telegram {#example-telegram}
|
||||
## Quick reminder {#quick-reminder}
|
||||
|
||||
Send alert notifications to a Telegram chat using the Telegram Bot API.
|
||||
|
||||
### Step 1: Create a Telegram Bot {#telegram-create-bot}
|
||||
|
||||
1. Open Telegram and search for **@BotFather**
|
||||
2. Send `/newbot` command
|
||||
3. Follow prompts to set bot name and username
|
||||
4. **Copy the bot token** (looks like `110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsaw`)
|
||||
|
||||
### Step 2: Get Your Chat ID {#telegram-get-chat-id}
|
||||
|
||||
**Option A: Using @userinfobot**
|
||||
|
||||
1. Search for **@userinfobot** in Telegram
|
||||
2. Start a chat with it
|
||||
3. It will reply with your user ID (this is your chat ID)
|
||||
|
||||
**Option B: Using getUpdates API**
|
||||
|
||||
1. Send a message to your bot
|
||||
2. Visit: `https://api.telegram.org/bot<YOUR_BOT_TOKEN>/getUpdates`
|
||||
3. Look for `"chat":{"id":123456789}` in the response
|
||||
4. Use this ID as your chat ID
|
||||
|
||||
**For Groups:**
|
||||
|
||||
1. Add your bot to the group
|
||||
2. Send a message in the group
|
||||
3. Visit the getUpdates URL above
|
||||
4. Look for the negative chat ID (like `-987654321`)
|
||||
|
||||
### Step 3: Configure Environment Variables {#telegram-env-variables}
|
||||
|
||||
Add to your `.env` file:
|
||||
|
||||
```bash
|
||||
TELEGRAM_BOT_TOKEN=110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsaw
|
||||
TELEGRAM_CHAT_ID=123456789
|
||||
```
|
||||
|
||||
### Step 4: Create Webhook Trigger in Kener {#telegram-create-trigger}
|
||||
|
||||
Navigate to `/manage/app/triggers` and create a new Webhook trigger:
|
||||
|
||||
**Name:** `Telegram Notifications`
|
||||
|
||||
**URL:** `https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage`
|
||||
|
||||
**Headers:** (Leave empty or set to `{"Content-Type": "application/json"}`)
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"chat_id": "$TELEGRAM_CHAT_ID",
|
||||
"text": "{{#is_triggered}}🚨 *ALERT TRIGGERED*{{/is_triggered}}{{#is_resolved}}✅ *ALERT RESOLVED*{{/is_resolved}}\n\n*{{alert_name}}*\n\n⚠️ *Severity:* {{alert_severity}}\n📈 *Status:* {{alert_status}}\n📋 *Type:* {{alert_for}}\n📊 *Value:* {{alert_value}}\n\n{{alert_message}}\n\n*Thresholds:*\nFailure: {{alert_failure_threshold}}\nSuccess: {{alert_success_threshold}}\nTime: {{alert_timestamp}}\n\n[{{alert_cta_text}}]({{alert_cta_url}})",
|
||||
"parse_mode": "Markdown",
|
||||
"disable_web_page_preview": true
|
||||
}
|
||||
```
|
||||
|
||||
### Enhanced Telegram with Buttons {#telegram-with-buttons}
|
||||
|
||||
For inline keyboard buttons:
|
||||
|
||||
```json
|
||||
{
|
||||
"chat_id": "$TELEGRAM_CHAT_ID",
|
||||
"text": "{{#is_triggered}}🚨 *ALERT TRIGGERED*{{/is_triggered}}{{#is_resolved}}✅ *ALERT RESOLVED*{{/is_resolved}}\n\n*{{alert_name}}*\n\n⚠️ *Severity:* {{alert_severity}}\n📈 *Status:* {{alert_status}}\n\n{{alert_message}}",
|
||||
"parse_mode": "Markdown",
|
||||
"reply_markup": {
|
||||
"inline_keyboard": [
|
||||
[
|
||||
{
|
||||
"text": "🔍 {{alert_cta_text}}",
|
||||
"url": "{{alert_cta_url}}"
|
||||
},
|
||||
{
|
||||
"text": "📊 Status Page",
|
||||
"url": "{{site_url}}"
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Telegram with HTML Formatting {#telegram-html-formatting}
|
||||
|
||||
Using HTML instead of Markdown:
|
||||
|
||||
```json
|
||||
{
|
||||
"chat_id": "$TELEGRAM_CHAT_ID",
|
||||
"text": "{{#is_triggered}}🚨 <b>ALERT TRIGGERED</b>{{/is_triggered}}{{#is_resolved}}✅ <b>ALERT RESOLVED</b>{{/is_resolved}}\n\n<b>{{alert_name}}</b>\n\n<b>Severity:</b> {{alert_severity}}\n<b>Status:</b> {{alert_status}}\n<b>Type:</b> {{alert_for}}\n\n{{alert_message}}\n\n<b>Failure Threshold:</b> {{alert_failure_threshold}}\n<b>Success Threshold:</b> {{alert_success_threshold}}\n<b>Time:</b> {{alert_timestamp}}\n\n<a href=\"{{alert_cta_url}}\">{{alert_cta_text}}</a>",
|
||||
"parse_mode": "HTML",
|
||||
"disable_web_page_preview": true
|
||||
}
|
||||
```
|
||||
|
||||
## Example 2: PagerDuty {#example-pagerduty}
|
||||
|
||||
Integrate with PagerDuty Events API v2 for incident management.
|
||||
|
||||
### Step 1: Get Integration Key {#pagerduty-get-key}
|
||||
|
||||
1. In PagerDuty, go to **Services**
|
||||
2. Select your service or create a new one
|
||||
3. Go to **Integrations** tab
|
||||
4. Add integration → **Events API V2**
|
||||
5. **Copy the Integration Key**
|
||||
|
||||
### Step 2: Configure Environment Variables {#pagerduty-env-variables}
|
||||
|
||||
```bash
|
||||
PAGERDUTY_INTEGRATION_KEY=abcd1234efgh5678ijkl
|
||||
```
|
||||
|
||||
### Step 3: Create Webhook Trigger {#pagerduty-create-trigger}
|
||||
|
||||
**Name:** `PagerDuty Critical Alerts`
|
||||
|
||||
**URL:** `https://events.pagerduty.com/v2/enqueue`
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
```
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"routing_key": "$PAGERDUTY_INTEGRATION_KEY",
|
||||
"event_action": "{{#is_triggered}}trigger{{/is_triggered}}{{#is_resolved}}resolve{{/is_resolved}}",
|
||||
"dedup_key": "kener-{{alert_id}}",
|
||||
"payload": {
|
||||
"summary": "{{alert_name}} - {{alert_status}}",
|
||||
"source": "{{alert_source}}",
|
||||
"severity": "{{#alert_severity}}critical{{/alert_severity}}",
|
||||
"timestamp": "{{alert_timestamp}}",
|
||||
"component": "{{alert_for}}",
|
||||
"group": "monitoring",
|
||||
"class": "{{alert_for}}",
|
||||
"custom_details": {
|
||||
"alert_name": "{{alert_name}}",
|
||||
"alert_type": "{{alert_for}}",
|
||||
"alert_value": "{{alert_value}}",
|
||||
"failure_threshold": "{{alert_failure_threshold}}",
|
||||
"success_threshold": "{{alert_success_threshold}}",
|
||||
"status": "{{alert_status}}",
|
||||
"message": "{{alert_message}}",
|
||||
"alert_url": "{{alert_cta_url}}"
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"href": "{{alert_cta_url}}",
|
||||
"text": "View in Kener"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Notes:**
|
||||
|
||||
- `event_action`: "trigger" creates incident, "resolve" closes it
|
||||
- `dedup_key`: Groups related events (uses Kener alert ID)
|
||||
- `severity`: critical, error, warning, or info
|
||||
- PagerDuty automatically handles acknowledgments and escalations
|
||||
|
||||
## Example 3: Microsoft Teams {#example-teams}
|
||||
|
||||
Send alerts to Microsoft Teams channels via incoming webhooks.
|
||||
|
||||
### Step 1: Create Incoming Webhook {#teams-create-webhook}
|
||||
|
||||
1. In Teams, go to your channel
|
||||
2. Click **•••** → **Connectors** → **Incoming Webhook**
|
||||
3. Configure and **copy the webhook URL**
|
||||
|
||||
### Step 2: Configure Environment Variables {#teams-env-variables}
|
||||
|
||||
```bash
|
||||
TEAMS_WEBHOOK_URL=https://outlook.office.com/webhook/abc123.../IncomingWebhook/xyz789...
|
||||
```
|
||||
|
||||
### Step 3: Create Webhook Trigger {#teams-create-trigger}
|
||||
|
||||
**Name:** `Microsoft Teams Alerts`
|
||||
|
||||
**URL:** `$TEAMS_WEBHOOK_URL`
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"@type": "MessageCard",
|
||||
"@context": "http://schema.org/extensions",
|
||||
"themeColor": "{{#is_triggered}}D13438{{/is_triggered}}{{#is_resolved}}28A745{{/is_resolved}}",
|
||||
"summary": "{{alert_name}}",
|
||||
"sections": [
|
||||
{
|
||||
"activityTitle": "{{#is_triggered}}🚨 Alert Triggered{{/is_triggered}}{{#is_resolved}}✅ Alert Resolved{{/is_resolved}}",
|
||||
"activitySubtitle": "{{alert_name}}",
|
||||
"facts": [
|
||||
{
|
||||
"name": "Monitor:",
|
||||
"value": "{{alert_for}}"
|
||||
},
|
||||
{
|
||||
"name": "Status:",
|
||||
"value": "{{alert_status}}"
|
||||
},
|
||||
{
|
||||
"name": "Severity:",
|
||||
"value": "{{alert_severity}}"
|
||||
},
|
||||
{
|
||||
"name": "Current Value:",
|
||||
"value": "{{alert_value}}"
|
||||
},
|
||||
{
|
||||
"name": "Threshold:",
|
||||
"value": "{{alert_failure_threshold}}"
|
||||
},
|
||||
{
|
||||
"name": "Time:",
|
||||
"value": "{{alert_timestamp}}"
|
||||
}
|
||||
],
|
||||
"markdown": true
|
||||
}
|
||||
],
|
||||
"potentialAction": [
|
||||
{
|
||||
"@type": "OpenUri",
|
||||
"name": "View Monitor",
|
||||
"targets": [
|
||||
{
|
||||
"os": "default",
|
||||
"uri": "{{alert_cta_url}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Example 4: Opsgenie {#example-opsgenie}
|
||||
|
||||
Send alerts to Opsgenie for on-call management.
|
||||
|
||||
### Setup {#opsgenie-setup}
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
```bash
|
||||
OPSGENIE_API_KEY=your-api-key-here
|
||||
```
|
||||
|
||||
**URL:** `https://api.opsgenie.com/v2/alerts`
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "GenieKey $OPSGENIE_API_KEY",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
```
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "{{alert_name}}",
|
||||
"alias": "kener-alert-{{alert_id}}",
|
||||
"description": "{{alert_message}}\n\nMonitor: {{alert_for}}\nCurrent: {{alert_value}}\nThreshold: {{alert_failure_threshold}}",
|
||||
"priority": "{{#alert_severity}}P1{{/alert_severity}}",
|
||||
"source": "{{alert_source}}",
|
||||
"entity": "{{alert_for}}",
|
||||
"tags": ["kener", "{{alert_severity}}", "{{alert_for}}"],
|
||||
"details": {
|
||||
"alert_name": "{{alert_name}}",
|
||||
"monitor": "{{alert_for}}",
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}",
|
||||
"current_value": "{{alert_value}}",
|
||||
"threshold": "{{alert_failure_threshold}}",
|
||||
"timestamp": "{{alert_timestamp}}",
|
||||
"alert_url": "{{alert_cta_url}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Example 5: Mattermost {#example-mattermost}
|
||||
|
||||
Send alerts to Mattermost channels.
|
||||
|
||||
### Setup {#mattermost-setup}
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
```bash
|
||||
MATTERMOST_WEBHOOK_URL=https://your-mattermost-domain/hooks/xxx-webhook-id-xxx
|
||||
```
|
||||
|
||||
**URL:** `$MATTERMOST_WEBHOOK_URL`
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"username": "Kener Alerts",
|
||||
"icon_url": "{{site_logo_url}}",
|
||||
"text": "### {{#is_triggered}}🚨 Alert Triggered{{/is_triggered}}{{#is_resolved}}✅ Alert Resolved{{/is_resolved}}\n\n**{{alert_name}}**\n\n**Monitor:** {{alert_for}}\n**Status:** {{alert_status}}\n**Severity:** {{alert_severity}}\n\n{{alert_message}}\n\n**Current Value:** {{alert_value}}\n**Threshold:** {{alert_failure_threshold}}\n**Time:** {{alert_timestamp}}\n\n[View Monitor]({{alert_cta_url}})"
|
||||
}
|
||||
```
|
||||
|
||||
## Example 6: Custom Logging Service {#example-custom-logging-service}
|
||||
|
||||
Send alerts to a custom HTTP endpoint for logging or processing.
|
||||
|
||||
### Simple JSON Webhook {#custom-logging-simple}
|
||||
|
||||
**URL:** `https://your-api.com/alerts`
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "Bearer $CUSTOM_API_TOKEN",
|
||||
"Content-Type": "application/json",
|
||||
"X-Service": "kener"
|
||||
}
|
||||
```
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"timestamp": "{{alert_timestamp}}",
|
||||
"event_type": "alert",
|
||||
"event_id": "{{alert_id}}",
|
||||
"alert": {
|
||||
"name": "{{alert_name}}",
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}",
|
||||
"is_active": {{#is_triggered}}true{{/is_triggered}}{{#is_resolved}}false{{/is_resolved}}
|
||||
},
|
||||
"monitor": {
|
||||
"name": "{{alert_for}}",
|
||||
"current_value": "{{alert_value}}",
|
||||
"threshold": "{{alert_failure_threshold}}"
|
||||
},
|
||||
"metadata": {
|
||||
"source": "{{alert_source}}",
|
||||
"description": "{{alert_message}}",
|
||||
"action_url": "{{alert_cta_url}}",
|
||||
"site_url": "{{site_url}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Example 7: Splunk HTTP Event Collector {#example-splunk}
|
||||
|
||||
Send alerts to Splunk for analysis and monitoring.
|
||||
|
||||
### Setup {#splunk-setup}
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
```bash
|
||||
SPLUNK_HEC_TOKEN=your-hec-token-here
|
||||
SPLUNK_HEC_URL=https://splunk.example.com:8088/services/collector/event
|
||||
```
|
||||
|
||||
**URL:** `$SPLUNK_HEC_URL`
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "Splunk $SPLUNK_HEC_TOKEN",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
```
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"sourcetype": "kener:alert",
|
||||
"source": "{{alert_source}}",
|
||||
"event": {
|
||||
"alert_id": "{{alert_id}}",
|
||||
"alert_name": "{{alert_name}}",
|
||||
"monitor": "{{alert_for}}",
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}",
|
||||
"is_triggered": {{#is_triggered}}true{{/is_triggered}}{{#is_resolved}}false{{/is_resolved}},
|
||||
"is_resolved": {{#is_resolved}}true{{/is_resolved}}{{#is_triggered}}false{{/is_triggered}},
|
||||
"current_value": "{{alert_value}}",
|
||||
"threshold": "{{alert_failure_threshold}}",
|
||||
"description": "{{alert_message}}",
|
||||
"action_url": "{{alert_cta_url}}",
|
||||
"timestamp": "{{alert_timestamp}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Example 8: Datadog Events API {#example-datadog}
|
||||
|
||||
Send alerts as events to Datadog.
|
||||
|
||||
### Setup {#datadog-setup}
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
```bash
|
||||
DATADOG_API_KEY=your-api-key
|
||||
DATADOG_SITE=datadoghq.com
|
||||
```
|
||||
|
||||
**URL:** `https://api.$DATADOG_SITE/api/v1/events`
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"DD-API-KEY": "$DATADOG_API_KEY",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
```
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "{{alert_name}}",
|
||||
"text": "{{alert_message}}\n\n**Monitor:** {{alert_for}}\n**Current:** {{alert_value}}\n**Threshold:** {{alert_failure_threshold}}",
|
||||
"alert_type": "{{#is_triggered}}error{{/is_triggered}}{{#is_resolved}}success{{/is_resolved}}",
|
||||
"priority": "{{#alert_severity}}normal{{/alert_severity}}",
|
||||
"tags": ["source:kener", "monitor:{{alert_for}}", "severity:{{alert_severity}}", "alert_type:{{alert_for}}"],
|
||||
"aggregation_key": "kener-alert-{{alert_id}}",
|
||||
"source_type_name": "kener"
|
||||
}
|
||||
```
|
||||
|
||||
## Example 9: Webhook with Retry Logic {#example-webhook-retry}
|
||||
|
||||
For critical integrations, implement a webhook that signals when to retry.
|
||||
|
||||
### Webhook with Idempotency {#webhook-retry-idempotency}
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Content-Type": "application/json",
|
||||
"X-Idempotency-Key": "kener-{{alert_id}}-{{timestamp_unix}}",
|
||||
"X-Webhook-Signature": "$WEBHOOK_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"idempotency_key": "kener-{{alert_id}}-{{timestamp_unix}}",
|
||||
"event": {
|
||||
"type": "alert.{{#is_triggered}}triggered{{/is_triggered}}{{#is_resolved}}resolved{{/is_resolved}}",
|
||||
"id": "{{alert_id}}",
|
||||
"timestamp": {{timestamp_unix}},
|
||||
"data": {
|
||||
"alert_name": "{{alert_name}}",
|
||||
"monitor": "{{alert_for}}",
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}",
|
||||
"current_value": "{{alert_value}}",
|
||||
"threshold": "{{alert_failure_threshold}}",
|
||||
"url": "{{alert_cta_url}}"
|
||||
}
|
||||
},
|
||||
"metadata": {
|
||||
"source": "{{alert_source}}",
|
||||
"site": "{{site_url}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Example 10: Conditional Webhooks by Severity {#example-conditional-webhooks}
|
||||
|
||||
Send to different endpoints based on alert severity.
|
||||
|
||||
### Critical Alerts to PagerDuty {#conditional-webhooks-critical}
|
||||
|
||||
Create one trigger for critical alerts:
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"routing_key": "$PAGERDUTY_CRITICAL_KEY",
|
||||
"event_action": "trigger",
|
||||
"payload": {
|
||||
"summary": "[CRITICAL] {{alert_name}}",
|
||||
"severity": "critical",
|
||||
"source": "{{alert_for}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Use this trigger **only** for alert configurations with `CRITICAL` severity.
|
||||
|
||||
### Warning Alerts to Slack {#conditional-webhooks-warning}
|
||||
|
||||
Create another trigger for warnings:
|
||||
|
||||
**URL:** `$SLACK_WARNINGS_WEBHOOK`
|
||||
|
||||
**Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "⚠️ Warning: {{alert_name}}",
|
||||
"blocks": [...]
|
||||
}
|
||||
```
|
||||
|
||||
Use this trigger **only** for alert configurations with `WARNING` severity.
|
||||
|
||||
## Testing Your Webhooks {#testing-webhooks}
|
||||
|
||||
### 1. Test Endpoint Independently {#testing-endpoint}
|
||||
|
||||
Before configuring in Kener, test your endpoint:
|
||||
|
||||
```bash
|
||||
curl -X POST https://api.telegram.org/bot<TOKEN>/sendMessage \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"chat_id": "123456789",
|
||||
"text": "Test message"
|
||||
}'
|
||||
```
|
||||
|
||||
### 2. Use Webhook Testing Tools {#testing-tools}
|
||||
|
||||
- [webhook.site](https://webhook.site) - Inspect webhook payloads
|
||||
- [RequestBin](https://requestbin.com) - Debug webhook requests
|
||||
- [ngrok](https://ngrok.com) - Tunnel to local development server
|
||||
|
||||
### 3. Create Test Alert {#testing-test-alert}
|
||||
|
||||
1. Create a low-threshold alert (e.g., latency > 1ms)
|
||||
2. Attach your webhook trigger
|
||||
3. Trigger the alert
|
||||
4. Verify the webhook fires correctly
|
||||
|
||||
### 4. Check Logs {#testing-logs}
|
||||
|
||||
Review Kener logs for webhook errors:
|
||||
|
||||
- Connection failures
|
||||
- Authentication errors
|
||||
- Rate limiting
|
||||
- Invalid JSON responses
|
||||
|
||||
## Common Patterns {#common-patterns}
|
||||
|
||||
### Pattern 1: Multiple Recipients {#pattern-multiple-recipients}
|
||||
|
||||
Send the same alert to multiple channels:
|
||||
|
||||
```json
|
||||
{
|
||||
"chat_ids": ["$TEAM_CHAT_ID", "$ONCALL_CHAT_ID"],
|
||||
"message": "{{alert_name}}"
|
||||
}
|
||||
```
|
||||
|
||||
Or create separate triggers for each recipient.
|
||||
|
||||
### Pattern 2: Severity-Based Routing {#pattern-severity-routing}
|
||||
|
||||
In your webhook body, include routing information:
|
||||
|
||||
```json
|
||||
{
|
||||
"destination": "{{#alert_severity}}critical-channel{{/alert_severity}}",
|
||||
"priority": "{{#is_triggered}}high{{/is_triggered}}{{#is_resolved}}low{{/is_resolved}}",
|
||||
"message": "{{alert_name}}"
|
||||
}
|
||||
```
|
||||
|
||||
### Pattern 3: Rich Metadata {#pattern-rich-metadata}
|
||||
|
||||
Include all available context for downstream processing:
|
||||
|
||||
```json
|
||||
{
|
||||
"alert": {
|
||||
"id": "{{alert_id}}",
|
||||
"name": "{{alert_name}}",
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}",
|
||||
"type": "{{alert_for}}"
|
||||
},
|
||||
"monitor": {
|
||||
"name": "{{alert_for}}",
|
||||
"current": "{{alert_value}}",
|
||||
"threshold": "{{alert_failure_threshold}}"
|
||||
},
|
||||
"context": {
|
||||
"triggered": {{#is_triggered}}true{{/is_triggered}}{{#is_resolved}}false{{/is_resolved}},
|
||||
"resolved": {{#is_resolved}}true{{/is_resolved}}{{#is_triggered}}false{{/is_triggered}},
|
||||
"description": "{{alert_message}}"
|
||||
},
|
||||
"links": {
|
||||
"monitor": "{{alert_cta_url}}",
|
||||
"site": "{{site_url}}"
|
||||
},
|
||||
"timestamps": {
|
||||
"iso": "{{alert_timestamp}}",
|
||||
"unix": {{timestamp_unix}}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Security Best Practices {#security-best-practices}
|
||||
|
||||
### 1. Always Use HTTPS {#security-https}
|
||||
|
||||
Only send webhooks to HTTPS endpoints to protect sensitive data in transit.
|
||||
|
||||
### 2. Store Secrets in Environment Variables {#security-env-vars}
|
||||
|
||||
```bash
|
||||
# .env file
|
||||
API_KEY=secret-value-here
|
||||
WEBHOOK_URL=https://api.example.com/webhook
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"Authorization": "Bearer $API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Implement Webhook Signatures {#security-signatures}
|
||||
|
||||
For custom webhooks, verify requests came from Kener:
|
||||
|
||||
**Headers:**
|
||||
|
||||
```json
|
||||
{
|
||||
"X-Webhook-Secret": "$WEBHOOK_SECRET"
|
||||
}
|
||||
```
|
||||
|
||||
Validate this secret on your receiving endpoint.
|
||||
|
||||
### 4. Rate Limiting {#security-rate-limiting}
|
||||
|
||||
Be aware of service rate limits:
|
||||
|
||||
- Telegram: 30 messages/second
|
||||
- Discord: 30 requests/60 seconds per webhook
|
||||
- Slack: ~1 request/second average
|
||||
- PagerDuty: 120 events/minute per integration
|
||||
|
||||
Adjust alert thresholds to avoid hitting limits.
|
||||
|
||||
### 5. Retry Logic {#security-retry-logic}
|
||||
|
||||
Services may be temporarily unavailable. Implement retry logic on your receiving endpoint for idempotent operations.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Webhook Not Firing {#troubleshooting-not-firing}
|
||||
|
||||
**Check:**
|
||||
|
||||
1. Is trigger active?
|
||||
2. Is trigger selected in alert configuration?
|
||||
3. Check Kener logs for errors
|
||||
4. Test endpoint independently with curl
|
||||
|
||||
### 401/403 Authentication Errors {#troubleshooting-auth-errors}
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Verify API keys/tokens are correct
|
||||
- Check environment variables are loaded
|
||||
- Ensure proper header format (`Bearer`, `GenieKey`, etc.)
|
||||
- Confirm API key has required permissions
|
||||
|
||||
### Timeout Errors {#troubleshooting-timeout}
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Check if endpoint is reachable
|
||||
- Verify firewall rules allow outbound connections
|
||||
- Test with webhook.site to isolate issue
|
||||
- Check endpoint response time (should be < 10s)
|
||||
|
||||
### Variables Not Substituting {#troubleshooting-variables}
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Check variable spelling and case
|
||||
- Verify mustache syntax: `{{var}}` not `{var}`
|
||||
- Test with simple payload first
|
||||
- Check Kener logs for template errors
|
||||
|
||||
### Rate Limit Exceeded {#troubleshooting-rate-limit}
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Increase alert failure thresholds
|
||||
- Use separate webhooks for different channels
|
||||
- Implement backoff logic in receiving endpoint
|
||||
- Consider batching alerts (custom endpoint)
|
||||
|
||||
## Next Steps {#next-steps}
|
||||
|
||||
- [Templates](/docs/alerting/templates) - Learn about customizing notification templates
|
||||
- [Triggers](/docs/alerting/triggers) - Understand trigger configuration
|
||||
- [Alert Configurations](/docs/alerting/alert-configurations) - Create alerts that use webhooks
|
||||
- Use Mustache variables from [Templates](/docs/v4/alerting/templates)
|
||||
- Use `$VAR_NAME` for secrets in URL/headers/body
|
||||
- For webhook headers, use `[{"key":"...","value":"..."}]`
|
||||
|
||||
@@ -198,4 +198,5 @@ status.yourdomain.com {
|
||||
|
||||
- Set up [Monitors](/docs/monitors)
|
||||
- Configure [Incidents](/docs/incidents)
|
||||
- Configure [Sharing Monitors](/docs/v4/sharing)
|
||||
- Explore the [API Reference](/docs/api-reference)
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
---
|
||||
title: Basic Setup
|
||||
description: Learn how to set up Kener with a simple configuration
|
||||
---
|
||||
|
||||
Once you have Kener running, you should now have a basic status page up and running. However, you may want to customize it further to fit your needs. In this section, we will go through some of the basic setup options available in Kener.
|
||||
|
||||
Most of the default settings are designed to work out of the box, but you can customize them by using the dashboard.
|
||||
|
||||
## Customize {#customize}
|
||||
|
||||
### Site Configuration {#site-configuration}
|
||||
|
||||
- **Site Name**: The name of your status page. This will be displayed in the header and title of the page.
|
||||
- **Site URL**: The URL of your status page. This is used for sharing and linking to your status page.
|
||||
- **Home Path**: The path to the home page of your status page. This is used for routing and navigation within your status page. By default, it is set to `/`, but you can change it to something else if you want.
|
||||
|
||||
> [!NOTE]
|
||||
> If you are hosting site on a subpath, make sure to set the Home Path to the correct value. For example, if your site is hosted at `https://example.com/status`, then you should set the Home Path to `/status`.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Setting up the Site URL correctly is very important for Kener to function properly.
|
||||
|
||||
## Add your first monitor (website) {#add-your-first-monitor-website}
|
||||
|
||||
Use this flow to add a simple HTTP/website monitor.
|
||||
|
||||
### Step 1: Create a new monitor {#create-new-monitor}
|
||||
|
||||
1. Open **Manage → Monitors**.
|
||||
2. Click **New Monitor**.
|
||||
3. In **General Settings**, set at least:
|
||||
- **Tag** (unique id, e.g. `homepage`)
|
||||
- **Name** (display name)
|
||||
- **Monitor Type** = API/Website monitor
|
||||
|
||||
### Step 2: Configure website check {#configure-website-check}
|
||||
|
||||
In **Configuration**, set your target URL and check interval/cron.
|
||||
|
||||
Example target URL:
|
||||
|
||||
```text
|
||||
https://example.com
|
||||
```
|
||||
|
||||
### Step 3: Make it visible on status page {#make-monitor-visible}
|
||||
|
||||
Open **Page Visibility** and add the monitor to at least one page.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> A monitor not added to any page will not appear on the public status page.
|
||||
|
||||
### Step 4: Verify {#verify-monitor}
|
||||
|
||||
1. Save the monitor.
|
||||
2. Open **View** from the monitor page.
|
||||
3. Confirm it appears and starts collecting checks.
|
||||
|
||||
For all API/website monitor options, see [API Monitors](/docs/v4/monitors/api).
|
||||
@@ -0,0 +1,150 @@
|
||||
---
|
||||
title: Alerting Trigger Examples
|
||||
description: Copy-ready webhook payload examples for Telegram and other common providers
|
||||
---
|
||||
|
||||
Use these with a **Webhook** trigger. All examples use runtime-supported Mustache variables and `$ENV_VAR` secret interpolation.
|
||||
|
||||
## Telegram {#telegram}
|
||||
|
||||
```text
|
||||
URL: https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"chat_id": "$TELEGRAM_CHAT_ID",
|
||||
"text": "{{#is_triggered}}🚨 *Alert Triggered*{{/is_triggered}}{{#is_resolved}}✅ *Alert Resolved*{{/is_resolved}}\n\n*{{alert_name}}*\nStatus: {{alert_status}}\nSeverity: {{alert_severity}}\nType: {{alert_for}}\nValue: {{alert_value}}\n\n{{alert_message}}\n\n[{{alert_cta_text}}]({{alert_cta_url}})",
|
||||
"parse_mode": "Markdown",
|
||||
"disable_web_page_preview": true
|
||||
}
|
||||
```
|
||||
|
||||
## PagerDuty Events API v2 {#pagerduty}
|
||||
|
||||
```text
|
||||
URL: https://events.pagerduty.com/v2/enqueue
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"routing_key": "$PAGERDUTY_INTEGRATION_KEY",
|
||||
"event_action": "{{#is_triggered}}trigger{{/is_triggered}}{{#is_resolved}}resolve{{/is_resolved}}",
|
||||
"dedup_key": "kener-{{alert_id}}",
|
||||
"payload": {
|
||||
"summary": "{{alert_name}} - {{alert_status}}",
|
||||
"source": "{{alert_source}}",
|
||||
"severity": "{{#is_triggered}}critical{{/is_triggered}}{{#is_resolved}}info{{/is_resolved}}",
|
||||
"timestamp": "{{alert_timestamp}}"
|
||||
},
|
||||
"links": [{ "href": "{{alert_cta_url}}", "text": "{{alert_cta_text}}" }]
|
||||
}
|
||||
```
|
||||
|
||||
## Microsoft Teams (Incoming Webhook) {#teams}
|
||||
|
||||
```text
|
||||
URL: $TEAMS_WEBHOOK_URL
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"@type": "MessageCard",
|
||||
"@context": "http://schema.org/extensions",
|
||||
"themeColor": "{{#is_triggered}}D13438{{/is_triggered}}{{#is_resolved}}28A745{{/is_resolved}}",
|
||||
"summary": "{{alert_name}}",
|
||||
"sections": [
|
||||
{
|
||||
"activityTitle": "{{#is_triggered}}🚨 Alert Triggered{{/is_triggered}}{{#is_resolved}}✅ Alert Resolved{{/is_resolved}}",
|
||||
"facts": [
|
||||
{ "name": "Status", "value": "{{alert_status}}" },
|
||||
{ "name": "Severity", "value": "{{alert_severity}}" },
|
||||
{ "name": "Type", "value": "{{alert_for}}" },
|
||||
{ "name": "Value", "value": "{{alert_value}}" }
|
||||
],
|
||||
"markdown": true
|
||||
}
|
||||
],
|
||||
"potentialAction": [
|
||||
{
|
||||
"@type": "OpenUri",
|
||||
"name": "{{alert_cta_text}}",
|
||||
"targets": [{ "os": "default", "uri": "{{alert_cta_url}}" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Opsgenie {#opsgenie}
|
||||
|
||||
```text
|
||||
URL: https://api.opsgenie.com/v2/alerts
|
||||
Headers: [{"key":"Authorization","value":"GenieKey $OPSGENIE_API_KEY"},{"key":"Content-Type","value":"application/json"}]
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "{{alert_name}}",
|
||||
"alias": "kener-alert-{{alert_id}}",
|
||||
"description": "{{alert_message}}",
|
||||
"priority": "{{#is_triggered}}P1{{/is_triggered}}{{#is_resolved}}P5{{/is_resolved}}",
|
||||
"source": "{{alert_source}}",
|
||||
"details": {
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}",
|
||||
"type": "{{alert_for}}",
|
||||
"value": "{{alert_value}}",
|
||||
"time": "{{alert_timestamp}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Generic custom API {#generic-api}
|
||||
|
||||
```text
|
||||
URL: https://api.example.com/alerts
|
||||
Headers: [{"key":"Authorization","value":"Bearer $CUSTOM_API_TOKEN"},{"key":"Content-Type","value":"application/json"}]
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"alert": {
|
||||
"id": "{{alert_id}}",
|
||||
"name": "{{alert_name}}",
|
||||
"status": "{{alert_status}}",
|
||||
"severity": "{{alert_severity}}"
|
||||
},
|
||||
"monitor": {
|
||||
"type": "{{alert_for}}",
|
||||
"value": "{{alert_value}}",
|
||||
"failure_threshold": "{{alert_failure_threshold}}",
|
||||
"success_threshold": "{{alert_success_threshold}}"
|
||||
},
|
||||
"context": {
|
||||
"message": "{{alert_message}}",
|
||||
"timestamp": "{{alert_timestamp}}",
|
||||
"url": "{{alert_cta_url}}"
|
||||
},
|
||||
"site": {
|
||||
"name": "{{site_name}}",
|
||||
"url": "{{site_url}}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Required environment variables {#required-env}
|
||||
|
||||
```env
|
||||
TELEGRAM_BOT_TOKEN=...
|
||||
TELEGRAM_CHAT_ID=...
|
||||
PAGERDUTY_INTEGRATION_KEY=...
|
||||
TEAMS_WEBHOOK_URL=...
|
||||
OPSGENIE_API_KEY=...
|
||||
CUSTOM_API_TOKEN=...
|
||||
```
|
||||
|
||||
## Notes {#notes}
|
||||
|
||||
- Use `$VAR_NAME` for secrets (not `{{env.VAR_NAME}}`).
|
||||
- Webhook headers must be key/value array entries.
|
||||
- Validate provider payloads with test channels before production routing.
|
||||
@@ -0,0 +1,103 @@
|
||||
---
|
||||
title: API Custom Eval Examples
|
||||
description: Ready-to-use API monitor eval functions for UP, DEGRADED, and DOWN states
|
||||
---
|
||||
|
||||
Use these snippets directly in an API monitor.
|
||||
|
||||
## JSONPlaceholder todos threshold eval {#todos-threshold-eval}
|
||||
|
||||
API: `https://jsonplaceholder.typicode.com/todos`
|
||||
Response: Array of todo objects with `completed` boolean field.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"userId": 1,
|
||||
"id": 1,
|
||||
"title": "delectus aut autem",
|
||||
"completed": false
|
||||
},
|
||||
{
|
||||
"userId": 1,
|
||||
"id": 2,
|
||||
"title": "quis ut nam facilis et officia qui",
|
||||
"completed": true
|
||||
},
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
### Eval function:
|
||||
|
||||
```javascript
|
||||
(async function (statusCode, responseTime, responseRaw) {
|
||||
if (statusCode !== 200) {
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
}
|
||||
|
||||
let todos
|
||||
try {
|
||||
todos = JSON.parse(responseRaw)
|
||||
} catch {
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
}
|
||||
|
||||
if (!Array.isArray(todos)) {
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
}
|
||||
|
||||
const completedCount = todos.filter((t) => t && t.completed === true).length
|
||||
|
||||
if (completedCount <= 10) {
|
||||
return { status: "UP", latency: responseTime }
|
||||
}
|
||||
|
||||
if (completedCount <= 20) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
## POST request body with $SECRET_PARAM (no eval) {#post-secret-param-body}
|
||||
|
||||
```json
|
||||
{
|
||||
"method": "POST",
|
||||
"headers": [{ "key": "Content-Type", "value": "application/json" }],
|
||||
"body": "{\"token\":\"$SECRET_PARAM\",\"action\":\"ping\"}"
|
||||
}
|
||||
```
|
||||
|
||||
```env
|
||||
SECRET_PARAM=your_real_secret_value
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> `$SECRET_PARAM` is resolved from environment variables at runtime. If `SECRET_PARAM` is not set in `.env` (or process env), your request body will not be populated with the secret value.
|
||||
|
||||
## Cheerio HTML content check {#cheerio-html-check-eval}
|
||||
|
||||
```javascript
|
||||
(async function (statusCode, responseTime, responseRaw, modules) {
|
||||
if (statusCode !== 200) {
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
}
|
||||
|
||||
let html = responseRaw
|
||||
const $ = modules.cheerio.load(html)
|
||||
const header = $('[class^="ContentBox_header"]')
|
||||
|
||||
let status = true
|
||||
if (!header.text().includes("We’re fully operational")) {
|
||||
status = false
|
||||
}
|
||||
|
||||
return {
|
||||
status: status ? "UP" : "DOWN",
|
||||
latency: responseTime
|
||||
}
|
||||
})
|
||||
```
|
||||
@@ -0,0 +1,116 @@
|
||||
---
|
||||
title: Email Templates
|
||||
description: Generate and customize built-in email templates with template variables and usage references
|
||||
---
|
||||
|
||||
Use **Manage → Templates** to edit Kener’s built-in general email templates.
|
||||
|
||||
## Quick setup {#quick-setup}
|
||||
|
||||
1. Open **Manage → Templates**.
|
||||
2. Select a template from the dropdown.
|
||||
3. Edit **Subject** and **HTML Body**.
|
||||
4. Click **Update Template**.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> You need an `admin` or `editor` role to update templates.
|
||||
|
||||
## Available templates {#available-templates}
|
||||
|
||||
Kener currently includes these templates:
|
||||
|
||||
| Template ID | Purpose |
|
||||
| --------------------------- | ------------------------------------------------- |
|
||||
| `invite_user` | Team member invitation email |
|
||||
| `verify_email` | Email verification link email |
|
||||
| `forgot_password` | Password reset link email |
|
||||
| `subscription_account_code` | OTP code for subscription account verification |
|
||||
| `subscription_update` | Incident/maintenance update email for subscribers |
|
||||
|
||||
## How customization works {#how-customization-works}
|
||||
|
||||
Each template has:
|
||||
|
||||
- `template_subject`
|
||||
- `template_html_body`
|
||||
- `template_text_body` (used as plain-text body when provided)
|
||||
|
||||
Kener renders subject and HTML body with Mustache variables.
|
||||
|
||||
> [!NOTE]
|
||||
> If `template_text_body` is empty, Kener falls back to generating plain-text content from rendered HTML.
|
||||
|
||||
## Common variables {#common-variables}
|
||||
|
||||
All templates can use site-level variables:
|
||||
|
||||
| Variable |
|
||||
| --------------- |
|
||||
| `{{site_name}}` |
|
||||
| `{{site_url}}` |
|
||||
| `{{logo}}` |
|
||||
| `{{favicon}}` |
|
||||
| `{{tagline}}` |
|
||||
|
||||
## Variables by template {#variables-by-template}
|
||||
|
||||
### invite_user {#invite-user-variables}
|
||||
|
||||
| Variable | Description |
|
||||
| ------------------------------------------------------------------------- | ------------------------- |
|
||||
| `{{site_name}}`, `{{site_url}}`, `{{logo}}`, `{{favicon}}`, `{{tagline}}` | Site branding/context |
|
||||
| `{{invitation_link}}` | Invitation acceptance URL |
|
||||
|
||||
### verify_email {#verify-email-variables}
|
||||
|
||||
| Variable | Description |
|
||||
| ------------------------------------------------------------------------- | ---------------------- |
|
||||
| `{{site_name}}`, `{{site_url}}`, `{{logo}}`, `{{favicon}}`, `{{tagline}}` | Site branding/context |
|
||||
| `{{verification_link}}` | Email verification URL |
|
||||
|
||||
### forgot_password {#forgot-password-variables}
|
||||
|
||||
| Variable | Description |
|
||||
| ------------------------------------------------------------------------- | --------------------- |
|
||||
| `{{site_name}}`, `{{site_url}}`, `{{logo}}`, `{{favicon}}`, `{{tagline}}` | Site branding/context |
|
||||
| `{{reset_link}}` | Password reset URL |
|
||||
|
||||
### subscription_account_code {#subscription-account-code-variables}
|
||||
|
||||
| Variable | Description |
|
||||
| ------------------------------------------------------------------------- | --------------------------- |
|
||||
| `{{site_name}}`, `{{site_url}}`, `{{logo}}`, `{{favicon}}`, `{{tagline}}` | Site branding/context |
|
||||
| `{{email_code}}` | OTP code sent to subscriber |
|
||||
|
||||
### subscription_update {#subscription-update-variables}
|
||||
|
||||
| Variable | Description |
|
||||
| ------------------------------------------------------------------------- | ---------------------------------------------------------- |
|
||||
| `{{site_name}}`, `{{site_url}}`, `{{logo}}`, `{{favicon}}`, `{{tagline}}` | Site branding/context |
|
||||
| `{{title}}` | Event title |
|
||||
| `{{update_subject}}` | Event subject line (also used by default subject template) |
|
||||
| `{{update_text}}` | Main update content |
|
||||
| `{{cta_text}}` | Call-to-action text |
|
||||
| `{{cta_url}}` | Call-to-action URL |
|
||||
| `{{update_id}}` | Event/update identifier |
|
||||
| `{{event_type}}` | Event category |
|
||||
|
||||
## Best practices {#best-practices}
|
||||
|
||||
- Keep subject lines short and user-facing.
|
||||
- Prefer editing wording and branding, not template structure.
|
||||
- Keep Mustache variables exactly as documented.
|
||||
- Test critical flows after updates (invite, verify, reset, subscription update).
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
- **Variables render blank**: confirm variable name spelling and braces.
|
||||
- **Emails fail after edits**: verify template still contains required flow links (`invitation_link`, `verification_link`, `reset_link`).
|
||||
- **Defaults did not come back after restart**: seeds only insert missing templates; existing customized templates are not overwritten.
|
||||
|
||||
## Related pages {#related-pages}
|
||||
|
||||
- [Email Setup](/docs/v4/setup/email-setup)
|
||||
- [User Management](/docs/v4/user-management)
|
||||
- [User Subscriptions](/docs/v4/subscriptions)
|
||||
- [Alerting Templates](/docs/v4/alerting/templates)
|
||||
@@ -148,62 +148,45 @@ This provides complete context about:
|
||||
|
||||
When the alert resolves, a detailed closure update is added:
|
||||
|
||||
```markdown
|
||||
The alert has been resolved, Total duration: 47 minutes
|
||||
|
||||
#### Alert Details
|
||||
|
||||
| Setting | Value |
|
||||
title: Auto-Generated Incidents
|
||||
description: Quick reference for incident creation via alerting
|
||||
| :-------------------- | :---------- |
|
||||
| **Monitor Name** | Payment API |
|
||||
| **Incident Status** | RESOLVED |
|
||||
| **Monitor Tag** | payment-api |
|
||||
| **Alert Type** | STATUS |
|
||||
| **Alert Value** | DOWN |
|
||||
| **Severity** | CRITICAL |
|
||||
| **Failure Threshold** | 3 |
|
||||
| **Success Threshold** | 5 |
|
||||
```
|
||||
| **Monitor Name** | Payment API |
|
||||
Kener can auto-create incidents from alert configurations.
|
||||
| **Monitor Tag** | payment-api |
|
||||
|
||||
## How it works {#how-it-works}
|
||||
|
||||
| **Alert Value** | DOWN |
|
||||
|
||||
1. Alert triggers.
|
||||
2. Alert has **Create Incident = YES**.
|
||||
3. Incident is created and monitor is attached.
|
||||
4. When alert resolves, incident is resolved automatically.
|
||||
| **Failure Threshold** | 3 |
|
||||
|
||||
## Where to configure {#where-to-configure}
|
||||
|
||||
This includes:
|
||||
Use **Manage → Alerts → Alert Configurations**.
|
||||
|
||||
- Total incident duration
|
||||
- Alert configuration
|
||||
|
||||
## Notes {#notes}
|
||||
|
||||
- Resolution confirmation
|
||||
- Success threshold that was met
|
||||
- Auto-generated incidents are ideal for critical, user-facing alerts.
|
||||
- Tune thresholds to avoid noisy incident creation.
|
||||
|
||||
## Configuration Requirements {#configuration-requirements}
|
||||
## See also {#see-also}
|
||||
|
||||
To enable auto-generated incidents:
|
||||
|
||||
### 1. Create Alert Configuration {#create-alert-config}
|
||||
|
||||
Navigate to **Manage > Alerts > Create Alert**
|
||||
|
||||
**Configure:**
|
||||
|
||||
- Monitor to watch
|
||||
- Alert type (STATUS, LATENCY, UPTIME)
|
||||
- Alert value (threshold)
|
||||
- Failure threshold
|
||||
- Success threshold
|
||||
- Severity (CRITICAL or WARNING)
|
||||
|
||||
**Enable Incident Creation:**
|
||||
|
||||
- Set **Create Incident** to **YES**
|
||||
|
||||
See [Alert Configurations](/docs/alerting/alert-configurations) for complete details.
|
||||
|
||||
### 2. Configure Triggers (Optional) {#configure-triggers}
|
||||
|
||||
While triggers are optional for incident creation, they enable notifications:
|
||||
|
||||
**Create Triggers:**
|
||||
|
||||
- Discord
|
||||
- Slack
|
||||
- Email
|
||||
- [Alert Configurations](/docs/v4/alerting/alert-configurations)
|
||||
- [Triggers](/docs/v4/alerting/triggers)
|
||||
- [Creating and Managing Incidents](/docs/v4/incidents/creating-managing)
|
||||
- Webhook
|
||||
|
||||
**Attach to Alert:**
|
||||
@@ -306,13 +289,11 @@ A monitor can have multiple alert configurations:
|
||||
|
||||
**Example:**
|
||||
|
||||
```
|
||||
Monitor: api-gateway
|
||||
|
||||
Alert 1: STATUS - DOWN (failure: 1)
|
||||
Alert 2: LATENCY - 1000ms (failure: 5)
|
||||
Alert 3: UPTIME - 99.9% (failure: 10)
|
||||
```
|
||||
|
||||
**Each Alert:**
|
||||
|
||||
@@ -367,7 +348,7 @@ If subscription system is configured:
|
||||
- Includes resolution update
|
||||
- Shows total duration
|
||||
|
||||
See [Subscriptions](/docs/subscriptions) for setup.
|
||||
See [Subscriptions](/docs/v4/subscriptions) for setup.
|
||||
|
||||
## Alert Logs {#alert-logs}
|
||||
|
||||
@@ -484,13 +465,11 @@ See [Troubleshooting Triggers](/docs/alerting/triggers#troubleshooting-triggers)
|
||||
|
||||
**Workflow:**
|
||||
|
||||
```
|
||||
1. Alert triggers → Auto-creates incident
|
||||
2. You investigate → Add IDENTIFIED update
|
||||
3. You deploy fix → Add MONITORING update
|
||||
4. Alert resolves → Auto-adds RESOLVED update
|
||||
5. You add post-mortem → Add final manual update
|
||||
```
|
||||
|
||||
### Threshold Tuning {#threshold-tuning}
|
||||
|
||||
|
||||
@@ -32,189 +32,78 @@ A clear, concise description of the issue that will be visible to users.
|
||||
- Be specific but brief
|
||||
- Mention the affected service
|
||||
- Avoid internal jargon
|
||||
|
||||
**Examples:**
|
||||
title: Creating and Managing Incidents
|
||||
description: Create incidents, add affected monitors, post updates, and resolve incidents
|
||||
|
||||
- ✅ "API Gateway Responding Slowly"
|
||||
- ✅ "Database Connection Issues"
|
||||
Use **Manage → Incidents** to create and manage incidents.
|
||||
- ❌ "DB01 high CPU" (too technical)
|
||||
- ❌ "Issue" (too vague)
|
||||
|
||||
#### Start Date/Time (Required) {#start-datetime}
|
||||
## Create an incident {#create-an-incident}
|
||||
|
||||
When did the incident begin?
|
||||
1. Open **New Incident**.
|
||||
2. Fill required fields:
|
||||
- **Title**
|
||||
- **Start Date/Time** (entered in local timezone)
|
||||
3. Optional:
|
||||
- **Global visibility** toggle
|
||||
- Initial update message
|
||||
4. Add affected monitors (optional but recommended).
|
||||
5. Click **Create Incident**.
|
||||
|
||||
**Important Notes:**
|
||||
## Add affected monitors {#add-affected-monitors}
|
||||
|
||||
For each monitor, set an impact:
|
||||
|
||||
- `DOWN`
|
||||
- `DEGRADED`
|
||||
|
||||
You can add, remove, and change impact while editing the incident.
|
||||
|
||||
- Enter time in **your local timezone**
|
||||
- Kener stores it as UTC internally
|
||||
- Use datetime picker or type manually
|
||||
- Can be in the past (for recording historical incidents)
|
||||
- Should reflect when users were first affected
|
||||
|
||||
**Default:** Current date and time
|
||||
## Add updates and change state {#add-updates-and-change-state}
|
||||
|
||||
### Step 2: Initial Update (Optional) {#initial-update}
|
||||
Updates are timeline entries shown to users. When posting an update, choose a state:
|
||||
|
||||
Provide an initial status update describing what's happening.
|
||||
- `INVESTIGATING`
|
||||
- `IDENTIFIED`
|
||||
- `MONITORING`
|
||||
- `RESOLVED`
|
||||
|
||||
**Format:** Supports Markdown formatting
|
||||
When set to `RESOLVED`, the incident closes and end time is recorded.
|
||||
|
||||
**When to Use:**
|
||||
## Edit an incident {#edit-an-incident}
|
||||
|
||||
- Describing initial symptoms
|
||||
- Explaining known scope
|
||||
- Setting expectations
|
||||
You can edit:
|
||||
|
||||
**Example:**
|
||||
- Title
|
||||
- Start time
|
||||
- Global visibility
|
||||
- Affected monitors and impact
|
||||
|
||||
```markdown
|
||||
We're experiencing elevated error rates on our API endpoints.
|
||||
Our team is investigating the root cause.
|
||||
Save changes to apply them immediately.
|
||||
|
||||
## Delete incident {#delete-incident}
|
||||
|
||||
**Affected Services:**
|
||||
Incidents can be deleted from the incident edit view.
|
||||
|
||||
- REST API
|
||||
- Webhook delivery
|
||||
> [!WARNING]
|
||||
> Deletion is irreversible and removes incident timeline context.
|
||||
|
||||
**Not Affected:**
|
||||
## Good practices {#good-practices}
|
||||
|
||||
- Dashboard UI
|
||||
- Data exports
|
||||
```
|
||||
- Keep titles user-facing and clear.
|
||||
- Add regular, concise updates during active incidents.
|
||||
- Keep monitor impact accurate as recovery progresses.
|
||||
|
||||
**Note:** This becomes the first update in the incident timeline with state "INVESTIGATING".
|
||||
## Related guides {#related-guides}
|
||||
|
||||
### Step 3: Affected Monitors (Optional) {#affected-monitors}
|
||||
|
||||
Select which monitors are impacted by this incident.
|
||||
|
||||
#### Adding Monitors {#adding-monitors}
|
||||
|
||||
1. Click **Add Monitor**
|
||||
2. Select a monitor from the dropdown
|
||||
3. Choose the impact level:
|
||||
- **DOWN** - Service completely unavailable
|
||||
- **DEGRADED** - Service partially available or slow
|
||||
4. Click **Add Monitor**
|
||||
|
||||
#### Multiple Monitors {#multiple-monitors}
|
||||
|
||||
You can add multiple monitors to a single incident. This is useful when:
|
||||
|
||||
- A backend issue affects multiple frontend services
|
||||
- Infrastructure problems impact several applications
|
||||
- Related services are all experiencing issues
|
||||
|
||||
#### Impact Level Guidance {#impact-guidance}
|
||||
|
||||
**Use DOWN when:**
|
||||
|
||||
- Monitor returns 100% errors
|
||||
- Service is completely inaccessible
|
||||
- All requests fail
|
||||
- Critical functionality broken
|
||||
|
||||
**Use DEGRADED when:**
|
||||
|
||||
- Monitor returns some errors (not all)
|
||||
- Service is slow but functional
|
||||
- Partial functionality unavailable
|
||||
- Intermittent issues
|
||||
|
||||
**Important:** The impact level you select here **overrides** the monitor's realtime status during the incident. See [Incident Impact on Monitoring](/docs/incidents/impact-on-monitoring) for details.
|
||||
|
||||
### Step 4: Create {#create-incident}
|
||||
|
||||
Click **Create Incident** to save the incident.
|
||||
|
||||
**What Happens Next:**
|
||||
|
||||
- Incident is created with state "INVESTIGATING"
|
||||
- If initial update provided, it's added as first comment
|
||||
- Affected monitors immediately show the incident status
|
||||
- Incident appears on public status page
|
||||
- Incident is assigned a unique ID
|
||||
|
||||
## Editing an Existing Incident {#editing-incident}
|
||||
|
||||
Click the **Edit** button (pencil icon) on any incident to modify it.
|
||||
|
||||
### Editable Fields {#editable-fields}
|
||||
|
||||
#### Title {#edit-title}
|
||||
|
||||
You can change the incident title at any time. The change is immediate and reflects on the public status page.
|
||||
|
||||
**When to Edit:**
|
||||
|
||||
- Initial title was unclear
|
||||
- Scope changed (add/remove affected services)
|
||||
- More specific information available
|
||||
|
||||
#### Start Date/Time {#edit-start-datetime}
|
||||
|
||||
You can adjust when the incident actually started.
|
||||
|
||||
**When to Edit:**
|
||||
|
||||
- Initial entry was incorrect
|
||||
- Discovered issue started earlier than reported
|
||||
- Aligning timeline with logs
|
||||
|
||||
**Cannot Change:**
|
||||
|
||||
- Cannot set start time to future
|
||||
- Cannot set start time after end time (if incident is resolved)
|
||||
|
||||
#### State {#edit-state}
|
||||
|
||||
**State is controlled by updates, not direct editing.** To change an incident's state, you must add an update (comment) with the new state.
|
||||
|
||||
See [Incident Updates](/docs/incidents/updates) for details.
|
||||
|
||||
### Managing Affected Monitors {#managing-monitors}
|
||||
|
||||
While editing an incident, you can:
|
||||
|
||||
#### Add More Monitors {#add-more-monitors}
|
||||
|
||||
Click **Add Monitor** and select additional monitors to add to the incident.
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Issue scope expanded
|
||||
- Additional services affected
|
||||
- Related problem discovered
|
||||
|
||||
#### Change Monitor Impact {#change-impact}
|
||||
|
||||
Click the **⋮** (three dots) menu on a monitor and select:
|
||||
|
||||
- **Down** - Change impact to DOWN
|
||||
- **Degraded** - Change impact to DEGRADED
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Partial recovery (DOWN → DEGRADED)
|
||||
- Worsening situation (DEGRADED → DOWN)
|
||||
- More accurate assessment available
|
||||
|
||||
#### Remove Monitors {#remove-monitors}
|
||||
|
||||
Click the **⋮** (three dots) menu on a monitor and select **Remove**.
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Monitor recovered (but incident ongoing for others)
|
||||
- Incorrectly added
|
||||
- Issue was unrelated
|
||||
|
||||
**Important:** Removing a monitor immediately restores its realtime monitoring status on the status page.
|
||||
|
||||
### Saving Changes {#saving-changes}
|
||||
|
||||
Click **Save Changes** to apply your edits.
|
||||
- [Incidents Overview](/docs/v4/incidents/overview)
|
||||
- [Impact on Monitoring](/docs/v4/incidents/impact-on-monitoring)
|
||||
- [Alert Configurations](/docs/v4/alerting/alert-configurations)
|
||||
|
||||
**What Changes Immediately:**
|
||||
|
||||
|
||||
@@ -1,430 +1,70 @@
|
||||
---
|
||||
title: "Incident Impact on Monitoring"
|
||||
description: "Understand how incidents override realtime monitoring data and affect what users see on the status page."
|
||||
title: Impact on Monitoring
|
||||
description: Understand incident status precedence and Event Display Settings
|
||||
---
|
||||
|
||||
One of the most important aspects of Kener's incident system is how incidents interact with realtime monitoring. When a monitor is part of an active incident, the incident's status **takes precedence** over the monitor's realtime check results.
|
||||
When a monitor is part of an active incident, incident impact can override realtime status shown on the status page.
|
||||
|
||||
## Why Status Override Matters {#why-override-matters}
|
||||
## Status precedence {#status-precedence}
|
||||
|
||||
Imagine this scenario:
|
||||
Kener applies status sources in this order (later overrides earlier):
|
||||
|
||||
1. Your API is experiencing intermittent issues (only 30% requests failing)
|
||||
2. Realtime monitoring shows status as "DEGRADED"
|
||||
3. You investigate and confirm it's a critical issue affecting users
|
||||
4. You create an incident marking the API as "DOWN"
|
||||
5. Five minutes later, monitoring catches a successful check and tries to show "UP"
|
||||
`default status → realtime monitor result → incident impact → maintenance impact`
|
||||
|
||||
**Without status override:** Your status page would show conflicting information - the monitoring says "UP" but your incident says there's a problem.
|
||||
This ensures users see consistent incident/maintenance communication while checks continue in the background.
|
||||
|
||||
**With status override:** The incident's "DOWN" status takes precedence, ensuring users see consistent information while you work on the issue.
|
||||
## Incident impact values {#incident-impact-values}
|
||||
|
||||
## How Status Precedence Works {#how-precedence-works}
|
||||
Set impact per monitor in an incident:
|
||||
|
||||
Kener evaluates monitor status in the following order (later entries override earlier ones):
|
||||
- `DOWN`
|
||||
- `DEGRADED`
|
||||
|
||||
### 1. Default Status (Lowest Priority) {#default-status}
|
||||
Use `DOWN` for full outage and `DEGRADED` for partial impact.
|
||||
|
||||
If a monitor has a `default_status` configured (UP, DOWN, or DEGRADED), this is applied first.
|
||||
## Realtime checks still run {#realtime-checks-still-run}
|
||||
|
||||
**Use Case:** Monitors that should show a specific state by default (useful for maintenance or controlled rollouts).
|
||||
Incident override does **not** stop monitor execution.
|
||||
|
||||
### 2. Realtime Monitoring Data {#realtime-data}
|
||||
- Monitoring jobs still run on schedule
|
||||
- Data points are still stored
|
||||
- Incident impact only affects effective/public status
|
||||
|
||||
The actual monitoring check results from your configured monitor type (API, Ping, TCP, etc.).
|
||||
## Event Display Settings {#event-display-settings}
|
||||
|
||||
**Normal Operation:** This is what determines status when no incidents or maintenance windows are active.
|
||||
Control incident visibility at:
|
||||
|
||||
### 3. Incident Status {#incident-status}
|
||||
**Manage → Site Configurations → Event Display Settings**
|
||||
|
||||
If the monitor is part of any active (non-RESOLVED) incident, the incident's `monitor_impact` overrides the realtime status.
|
||||
These incident settings are used when Kener builds notification/event payloads for users.
|
||||
|
||||
**Active Incidents:** Incidents with state = INVESTIGATING, IDENTIFIED, or MONITORING (not RESOLVED).
|
||||
### Incidents settings {#incidents-settings}
|
||||
|
||||
### 4. Maintenance Status (Highest Priority) {#maintenance-status}
|
||||
| Setting | Effect for users |
|
||||
| ------------------------------- | ------------------------------------------------------------------------ |
|
||||
| `incidents.enabled` | Master switch. If `false`, incidents are not included in event payloads. |
|
||||
| `incidents.ongoing.show` | If `true`, ongoing incidents are included. |
|
||||
| `incidents.resolved.show` | If `true`, resolved incidents are included. |
|
||||
| `incidents.resolved.maxCount` | Maximum number of resolved incidents returned. |
|
||||
| `incidents.resolved.daysInPast` | How far back to look for resolved incidents. |
|
||||
|
||||
If the monitor is in a maintenance window, the maintenance's `monitor_impact` takes ultimate precedence.
|
||||
### How values are applied {#how-values-are-applied}
|
||||
|
||||
**Note:** Maintenance is covered separately in the Maintenance documentation.
|
||||
At runtime, Kener checks these flags before querying incidents:
|
||||
|
||||
### Order of Precedence {#precedence-order}
|
||||
- If disabled, query returns no incidents.
|
||||
- If enabled, only selected categories (ongoing/resolved) are fetched.
|
||||
- Resolved incidents are limited by `maxCount` and `daysInPast`.
|
||||
|
||||
```
|
||||
Default Status → Realtime → Incident → Maintenance
|
||||
(increasing priority →)
|
||||
```
|
||||
So changing these values directly changes what incident events users receive/see.
|
||||
|
||||
**Example Scenario:**
|
||||
## Practical guidance {#practical-guidance}
|
||||
|
||||
```javascript
|
||||
// Monitor: api-gateway
|
||||
- Keep incident impact aligned with real user impact.
|
||||
- Update impact as recovery progresses.
|
||||
- Use Event Display Settings to reduce noisy timelines.
|
||||
|
||||
Default Status: UP
|
||||
Realtime Check: DOWN (503 error)
|
||||
Incident Impact: DEGRADED
|
||||
Maintenance: (none)
|
||||
## Related guides {#related-guides}
|
||||
|
||||
Final Status Shown: DEGRADED (incident overrides realtime DOWN)
|
||||
```
|
||||
|
||||
## Monitor Impact Levels {#impact-levels}
|
||||
|
||||
When adding a monitor to an incident, you specify the impact level:
|
||||
|
||||
### DOWN {#impact-down}
|
||||
|
||||
Monitor is completely unavailable or non-functional.
|
||||
|
||||
**When to Use:**
|
||||
|
||||
- All requests fail
|
||||
- Service returns 100% errors
|
||||
- Critical functionality broken
|
||||
- Users cannot use the service
|
||||
|
||||
**What Users See:**
|
||||
|
||||
- Red status indicator
|
||||
- "Down" or "Offline" label
|
||||
- Incident linked on monitor page
|
||||
|
||||
### DEGRADED {#impact-degraded}
|
||||
|
||||
Monitor is partially available or performing poorly.
|
||||
|
||||
**When to Use:**
|
||||
|
||||
- Increased error rate (but not 100%)
|
||||
- Slow response times
|
||||
- Partial functionality unavailable
|
||||
- Intermittent issues
|
||||
|
||||
**What Users See:**
|
||||
|
||||
- Yellow/orange status indicator
|
||||
- "Degraded" label
|
||||
- Incident linked on monitor page
|
||||
|
||||
### MAINTENANCE {#impact-maintenance}
|
||||
|
||||
Monitor is undergoing planned maintenance (rare for incidents).
|
||||
|
||||
**When to Use:**
|
||||
|
||||
- Emergency maintenance during an incident
|
||||
- Planned fix requiring downtime
|
||||
- Controlled service disruption
|
||||
|
||||
**What Users See:**
|
||||
|
||||
- Gray status indicator
|
||||
- "Maintenance" label
|
||||
- Incident details available
|
||||
|
||||
**Note:** Typically you'd use a Maintenance Window instead of an incident with MAINTENANCE impact.
|
||||
|
||||
## Realtime Data Preservation {#data-preservation}
|
||||
|
||||
While incident status overrides what users see, Kener preserves the underlying monitoring data:
|
||||
|
||||
### Status Override Only {#status-override-only}
|
||||
|
||||
When an incident is active:
|
||||
|
||||
- **Status** is overridden by incident impact
|
||||
- **Latency** is preserved from realtime checks
|
||||
- **Error messages** cascade (see below)
|
||||
- **Timestamps** remain accurate
|
||||
|
||||
**Example:**
|
||||
|
||||
```javascript
|
||||
// During incident marked as DEGRADED:
|
||||
Realtime Check Result: {
|
||||
status: "DOWN", // Overridden
|
||||
latency: 523, // Preserved ✓
|
||||
error_message: "503 error" // See error cascading
|
||||
}
|
||||
|
||||
Status Shown to Users: {
|
||||
status: "DEGRADED", // From incident
|
||||
latency: 523, // From realtime
|
||||
type: "INCIDENT" // Indicator of override
|
||||
}
|
||||
```
|
||||
|
||||
### Error Message Cascading {#error-cascading}
|
||||
|
||||
Error messages follow the same precedence order with cascading:
|
||||
|
||||
1. Start with default status error (if any)
|
||||
2. Override with realtime error (if present)
|
||||
3. Override with incident error (if present)
|
||||
4. Override with maintenance error (if present)
|
||||
|
||||
**For Incidents:**
|
||||
|
||||
- Default error: "Status set by manual incident"
|
||||
- Realtime errors are preserved unless incident has its own error
|
||||
|
||||
### Data Type Indicator {#data-type-indicator}
|
||||
|
||||
The monitoring result includes a `type` field indicating the source:
|
||||
|
||||
**Possible Values:**
|
||||
|
||||
- `REALTIME` - Normal monitoring check
|
||||
- `INCIDENT` - Status from incident
|
||||
- `MAINTENANCE` - Status from maintenance
|
||||
- `DEFAULT_STATUS` - From default status config
|
||||
- `TIMEOUT` - Check timed out
|
||||
|
||||
**Usage:** Helps distinguish between actual checks and manual overrides when analyzing data.
|
||||
|
||||
## When Override Starts and Stops {#override-timing}
|
||||
|
||||
### Override Begins {#override-begins}
|
||||
|
||||
When you add a monitor to an incident with a specific impact:
|
||||
|
||||
**Immediately:**
|
||||
|
||||
- Monitor status changes to incident impact
|
||||
- Realtime checks continue in background
|
||||
- Status page reflects incident status
|
||||
- Users see incident information
|
||||
|
||||
**Note:** The change is immediate upon saving, regardless of when the next monitoring check runs.
|
||||
|
||||
### Override Ends {#override-ends}
|
||||
|
||||
Override stops when:
|
||||
|
||||
**Incident is Resolved:**
|
||||
|
||||
- State changes to RESOLVED
|
||||
- Monitor returns to realtime status
|
||||
- May take 1-2 minutes to reflect
|
||||
|
||||
**Monitor Removed from Incident:**
|
||||
|
||||
- Clicking "Remove" on the monitor in incident edit
|
||||
- Monitor immediately returns to realtime status
|
||||
|
||||
**Incident is Closed/Deleted:**
|
||||
|
||||
- Incident status set to CLOSED
|
||||
- All monitors return to realtime
|
||||
- Historical data retained
|
||||
|
||||
### Transition Behavior {#transition-behavior}
|
||||
|
||||
When override ends:
|
||||
|
||||
**Realtime Status Returns:**
|
||||
|
||||
- Next monitoring check determines status
|
||||
- May show UP, DOWN, or DEGRADED based on actual state
|
||||
- Users see current realtime data
|
||||
|
||||
**Latency Continues:**
|
||||
|
||||
- Latency data was never overridden
|
||||
- Historical latency preserved during incident
|
||||
- No data gaps in latency charts
|
||||
|
||||
## Multiple Incidents for One Monitor {#multiple-incidents}
|
||||
|
||||
If a monitor is part of multiple active incidents:
|
||||
|
||||
**Precedence Rule:**
|
||||
|
||||
- DOWN takes precedence over DEGRADED
|
||||
- DEGRADED takes precedence over MAINTENANCE
|
||||
- Most severe impact is shown
|
||||
|
||||
**Example:**
|
||||
|
||||
```javascript
|
||||
Monitor: payment-api
|
||||
|
||||
Incident A: monitor_impact = DEGRADED
|
||||
Incident B: monitor_impact = DOWN
|
||||
|
||||
Status Shown: DOWN (most severe)
|
||||
```
|
||||
|
||||
**All Incidents Shown:**
|
||||
|
||||
- Status page links to all active incidents
|
||||
- Users can see full context
|
||||
- Each incident has its own timeline
|
||||
|
||||
## Realtime Checks Continue {#realtime-continues}
|
||||
|
||||
**Important:** Even when incident overrides status, monitoring checks continue:
|
||||
|
||||
**Benefits:**
|
||||
|
||||
1. **Data Continuity** - No gaps in latency/uptime data
|
||||
2. **Automatic Detection** - Catch when issue actually resolves
|
||||
3. **Alert Triggers** - Alerts still evaluate realtime data
|
||||
4. **Historical Accuracy** - Complete monitoring history preserved
|
||||
|
||||
**What Gets Checked:**
|
||||
|
||||
- HTTP endpoints still receive requests
|
||||
- Ping monitors still send pings
|
||||
- Database connections still tested
|
||||
- All monitor types continue normal schedule
|
||||
|
||||
**What's Different:**
|
||||
|
||||
- Status shown to users is from incident
|
||||
- Incident state controls timeline
|
||||
- Manual updates take precedence
|
||||
|
||||
## Use Cases and Examples {#use-cases}
|
||||
|
||||
### Use Case 1: Partial Outage {#use-case-partial-outage}
|
||||
|
||||
**Scenario:**
|
||||
|
||||
- 20% of API requests failing
|
||||
- Monitoring shows DEGRADED (not DOWN)
|
||||
- But impact to users is severe
|
||||
|
||||
**Solution:**
|
||||
|
||||
- Create incident with monitor_impact = DOWN
|
||||
- Incident status (DOWN) overrides realtime (DEGRADED)
|
||||
- Users see accurate severity
|
||||
- You maintain control of messaging
|
||||
|
||||
### Use Case 2: False Positive Recovery {#use-case-false-positive}
|
||||
|
||||
**Scenario:**
|
||||
|
||||
- Database connection issues
|
||||
- Create incident, mark monitor as DOWN
|
||||
- Monitoring catches one successful check
|
||||
- Would show UP, but issue still present
|
||||
|
||||
**Solution:**
|
||||
|
||||
- Incident impact stays DOWN
|
||||
- Realtime "UP" is overridden
|
||||
- Users see consistent DOWN status
|
||||
- You resolve incident only when truly fixed
|
||||
|
||||
### Use Case 3: Cascading Failures {#use-case-cascading}
|
||||
|
||||
**Scenario:**
|
||||
|
||||
- Backend service fails
|
||||
- Affects 5 frontend monitors
|
||||
- Some show DOWN, some DEGRADED based on retry logic
|
||||
|
||||
**Solution:**
|
||||
|
||||
- Create single incident
|
||||
- Add all 5 monitors
|
||||
- Set appropriate impact for each
|
||||
- Users see coordinated incident
|
||||
- Single timeline for all affected services
|
||||
|
||||
### Use Case 4: Silent Issue {#use-case-silent-issue}
|
||||
|
||||
**Scenario:**
|
||||
|
||||
- Service technically "UP" (responds to checks)
|
||||
- But returns incorrect data
|
||||
- Monitoring shows UP
|
||||
- Users are affected
|
||||
|
||||
**Solution:**
|
||||
|
||||
- Create incident
|
||||
- Add monitor with impact DEGRADED or DOWN
|
||||
- Override realtime UP status
|
||||
- Communicate the actual issue to users
|
||||
- Resolve when data quality restored
|
||||
|
||||
## Monitoring During Incidents {#monitoring-during}
|
||||
|
||||
### Viewing Realtime Data {#viewing-realtime}
|
||||
|
||||
**Dashboard View:**
|
||||
|
||||
- Incident page shows incident status
|
||||
- Monitor page shows incident indicator
|
||||
- Historical charts show realtime data points
|
||||
- Can distinguish incident period in charts
|
||||
|
||||
**API Access:**
|
||||
|
||||
- API returns both realtime and effective status
|
||||
- `status` = shown to users (incident override)
|
||||
- `realtime_status` = actual check result (if applicable)
|
||||
- `type` field indicates source
|
||||
|
||||
### Alert Behavior {#alert-behavior}
|
||||
|
||||
**Alerts Continue:**
|
||||
|
||||
- Alert configurations still evaluate realtime data
|
||||
- Can trigger during incidents
|
||||
- Can help detect resolution
|
||||
- Can create additional incidents if configured
|
||||
|
||||
**Best Practice:**
|
||||
|
||||
- May want to temporarily disable alerts for monitors in incidents
|
||||
- Prevents alert fatigue
|
||||
- Focus on incident resolution
|
||||
- Re-enable after incident resolves
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Accurate Impact Levels {#accurate-impact}
|
||||
|
||||
**Be Honest:**
|
||||
|
||||
- Use DOWN when truly down
|
||||
- Use DEGRADED when partially available
|
||||
- Don't downplay severity
|
||||
|
||||
**Update as Needed:**
|
||||
|
||||
- Change impact if situation changes
|
||||
- DOWN → DEGRADED during partial recovery
|
||||
- DEGRADED → DOWN if worsening
|
||||
|
||||
### Remove When Recovered {#remove-when-recovered}
|
||||
|
||||
**Individual Monitor Recovery:**
|
||||
|
||||
- If one monitor recovers but incident ongoing
|
||||
- Remove that monitor from incident
|
||||
- Lets realtime status show
|
||||
- Other monitors remain in incident
|
||||
|
||||
**Don't Leave Lingering:**
|
||||
|
||||
- Remove monitors as they recover
|
||||
- Keep incident scope accurate
|
||||
- Users see correct per-monitor status
|
||||
|
||||
### Coordinate with Updates {#coordinate-updates}
|
||||
|
||||
**State and Status Should Match:**
|
||||
|
||||
- If changing state to MONITORING, impact might reduce to DEGRADED
|
||||
- If state is RESOLVED, remove monitors or close incident
|
||||
- Keep timeline and status consistent
|
||||
|
||||
## Next Steps {#next-steps}
|
||||
|
||||
- [Incident Updates](/docs/incidents/updates) - Learn how to update incident state and communicate progress
|
||||
- [Creating and Managing Incidents](/docs/incidents/creating-managing) - Back to incident management basics
|
||||
- [Auto-Generated Incidents](/docs/incidents/auto-generated) - How alerts automatically create and manage incidents
|
||||
- [Incidents Overview](/docs/v4/incidents/overview)
|
||||
- [Creating and Managing Incidents](/docs/v4/incidents/creating-managing)
|
||||
|
||||
@@ -1,188 +1,55 @@
|
||||
---
|
||||
title: "Incidents Overview"
|
||||
description: "Understanding incidents in Kener: What they are, their lifecycle, and how they help track service disruptions."
|
||||
title: Incidents Overview
|
||||
description: Track service disruptions with incidents, updates, and monitor impact in Kener
|
||||
---
|
||||
|
||||
Incidents in Kener are structured records that track service disruptions, outages, or degradations. They provide a chronological timeline of issues, affected services, and resolution progress, making them visible to your users on the status page.
|
||||
Incidents are records for outages or degradations that affect one or more monitors. They provide a user-facing timeline of what happened, current status, and resolution progress.
|
||||
|
||||
## What is an Incident? {#what-is-incident}
|
||||
## What an incident includes {#what-an-incident-includes}
|
||||
|
||||
An incident represents an unplanned service disruption or degradation that affects one or more monitors. Each incident includes:
|
||||
Each incident has:
|
||||
|
||||
- **Title** - A clear description of the issue
|
||||
- **Start Time** - When the incident began
|
||||
- **End Time** - When the incident was resolved (null if ongoing)
|
||||
- **State** - Current resolution progress
|
||||
- **Updates** - Timeline of status changes and communications
|
||||
- **Affected Monitors** - Services impacted by this incident
|
||||
- Title
|
||||
- Start time
|
||||
- Current state
|
||||
- Affected monitors and impact level (`DOWN` / `DEGRADED`)
|
||||
- Updates timeline
|
||||
|
||||
## Incident Lifecycle {#incident-lifecycle}
|
||||
## State lifecycle {#state-lifecycle}
|
||||
|
||||
Every incident progresses through a series of states that indicate resolution progress:
|
||||
Incidents typically move through:
|
||||
|
||||
### States {#states}
|
||||
`INVESTIGATING → IDENTIFIED → MONITORING → RESOLVED`
|
||||
|
||||
#### 1. INVESTIGATING {#investigating}
|
||||
- **INVESTIGATING**: team is actively diagnosing
|
||||
- **IDENTIFIED**: root cause is known
|
||||
- **MONITORING**: fix is applied, watching stability
|
||||
- **RESOLVED**: incident is closed
|
||||
|
||||
**Initial State** - Team is actively investigating the cause of the issue.
|
||||
When an incident becomes `RESOLVED`, Kener sets the end time automatically.
|
||||
|
||||
**When to Use:**
|
||||
## Sources {#sources}
|
||||
|
||||
- Issue just detected
|
||||
- Root cause unknown
|
||||
- Working to understand impact
|
||||
Incidents can come from:
|
||||
|
||||
**Example:** "We're investigating reports of slow API response times."
|
||||
- **Dashboard** (manual creation)
|
||||
- **Alerting** (auto-created when an alert is configured to create incidents)
|
||||
|
||||
#### 2. IDENTIFIED {#identified}
|
||||
## Public visibility {#public-visibility}
|
||||
|
||||
**Root Cause Known** - The team has identified what's causing the issue.
|
||||
Open incidents appear on the public status page and incident views.
|
||||
|
||||
**When to Use:**
|
||||
Monitor status shown to users follows incident/maintenance precedence (see impact page).
|
||||
|
||||
- Root cause has been found
|
||||
- Working on implementing a fix
|
||||
- May not have a timeline yet
|
||||
## Keep it concise for users {#keep-it-concise-for-users}
|
||||
|
||||
**Example:** "Identified database connection pool exhaustion causing errors."
|
||||
- Use a clear title
|
||||
- Add only meaningful updates
|
||||
- Keep impact accurate per monitor
|
||||
- Resolve promptly when stable
|
||||
|
||||
#### 3. MONITORING {#monitoring}
|
||||
## Related guides {#related-guides}
|
||||
|
||||
**Fix Applied** - A fix has been implemented and is being monitored.
|
||||
|
||||
**When to Use:**
|
||||
|
||||
- Fix has been deployed
|
||||
- Monitoring to ensure stability
|
||||
- Verifying the issue is truly resolved
|
||||
|
||||
**Example:** "Database connection pool increased. Monitoring for stability."
|
||||
|
||||
#### 4. RESOLVED {#resolved}
|
||||
|
||||
**Final State** - Issue is fully resolved and incident is closed.
|
||||
|
||||
**When to Use:**
|
||||
|
||||
- Issue no longer occurring
|
||||
- Service fully operational
|
||||
- Confidence in resolution
|
||||
|
||||
**Example:** "All systems operational. Incident resolved."
|
||||
|
||||
### Automatic End Time {#automatic-end-time}
|
||||
|
||||
When an incident's state changes to **RESOLVED**, Kener automatically:
|
||||
|
||||
- Sets the `end_date_time` to the timestamp of the resolution update
|
||||
- Calculates the total incident duration
|
||||
- Displays the incident as closed on the status page
|
||||
|
||||
If you change the state back from RESOLVED to any other state, the `end_date_time` is cleared and the incident becomes ongoing again.
|
||||
|
||||
## Incident Sources {#incident-sources}
|
||||
|
||||
Incidents can be created from two sources:
|
||||
|
||||
### Manual Creation (Dashboard) {#manual-dashboard}
|
||||
|
||||
Created by users through the management dashboard at `/manage/app/incidents`.
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Manually tracking known issues
|
||||
- Creating historical records
|
||||
- Issues detected outside Kener
|
||||
|
||||
**Source Value:** `DASHBOARD`
|
||||
|
||||
### Auto-Generated (Alerts) {#auto-generated-alerts}
|
||||
|
||||
Automatically created when alert configurations trigger and have "Create Incident" enabled.
|
||||
|
||||
**Use Cases:**
|
||||
|
||||
- Automated incident creation
|
||||
- Alert-driven workflows
|
||||
- Consistent incident tracking
|
||||
|
||||
**Source Value:** `ALERT`
|
||||
|
||||
**Note:** Alert-generated incidents include a link back to the originating alert in their metadata.
|
||||
|
||||
## Public Visibility {#public-visibility}
|
||||
|
||||
All incidents with status "OPEN" are visible on your public status page:
|
||||
|
||||
### Home Page {#home-page-display}
|
||||
|
||||
- Shows recent incidents (configurable count)
|
||||
- Displays current state and affected monitors
|
||||
- Shows latest update for each incident
|
||||
|
||||
### Incidents Page {#incidents-page}
|
||||
|
||||
- Lists all open incidents in chronological order
|
||||
- Filterable by date range
|
||||
- Searchable by title or affected service
|
||||
|
||||
### Individual Incident View {#individual-view}
|
||||
|
||||
- Complete timeline of all updates
|
||||
- Full list of affected monitors with impact levels
|
||||
- Duration calculation
|
||||
- Shareable direct link
|
||||
|
||||
## Incident Timeline {#incident-timeline}
|
||||
|
||||
Each incident maintains a complete timeline through **updates** (also called comments). The timeline:
|
||||
|
||||
- Shows progression through states
|
||||
- Provides transparency to users
|
||||
- Records all communications
|
||||
- Displays in reverse chronological order (newest first)
|
||||
|
||||
Updates are covered in detail in [Incident Updates](/docs/incidents/updates).
|
||||
|
||||
## Affected Monitors {#affected-monitors}
|
||||
|
||||
Incidents can affect one or more monitors. For each affected monitor, you specify:
|
||||
|
||||
**Monitor Impact:**
|
||||
|
||||
- **DOWN** - Monitor is completely unavailable
|
||||
- **DEGRADED** - Monitor is partially unavailable or slow
|
||||
- **MAINTENANCE** - Monitor undergoing maintenance (rare for incidents)
|
||||
|
||||
When a monitor is part of an active incident, the incident's impact **overrides** the monitor's realtime status. This ensures consistency between what users see and what incidents report.
|
||||
|
||||
Learn more in [Incident Impact on Monitoring](/docs/incidents/impact-on-monitoring).
|
||||
|
||||
## Incident Duration {#incident-duration}
|
||||
|
||||
Duration is automatically calculated:
|
||||
|
||||
**For Ongoing Incidents:**
|
||||
|
||||
- Duration = Current Time - Start Time
|
||||
- Updates in real-time
|
||||
- Displayed as "Ongoing" or time elapsed
|
||||
|
||||
**For Resolved Incidents:**
|
||||
|
||||
- Duration = End Time - Start Time
|
||||
- Fixed value
|
||||
- Displays total incident length
|
||||
|
||||
**Display Formats:**
|
||||
|
||||
- Less than 1 hour: "45 minutes"
|
||||
- Less than 1 day: "3 hours 20 minutes"
|
||||
- Multiple days: "2 days 5 hours"
|
||||
|
||||
## Next Steps {#next-steps}
|
||||
|
||||
- [Creating and Managing Incidents](/docs/incidents/creating-managing) - Learn how to create and edit incidents
|
||||
- [Incident Updates](/docs/incidents/updates) - Understand how to add updates and change states
|
||||
- [Incident Impact on Monitoring](/docs/incidents/impact-on-monitoring) - How incidents override realtime status
|
||||
- [Auto-Generated Incidents](/docs/incidents/auto-generated) - How alerts create incidents automatically
|
||||
- [Creating and Managing Incidents](/docs/v4/incidents/creating-managing)
|
||||
- [Impact on Monitoring](/docs/v4/incidents/impact-on-monitoring)
|
||||
- [Alert Configurations](/docs/v4/alerting/alert-configurations)
|
||||
|
||||
@@ -60,69 +60,32 @@ The incident state after this update. This is **crucial** because:
|
||||
- State progression drives the incident lifecycle
|
||||
|
||||
**Available States:**
|
||||
title: Incident Updates
|
||||
description: Quick reference for posting timeline updates on incidents
|
||||
|
||||
- **INVESTIGATING** - Initial investigation
|
||||
- **IDENTIFIED** - Root cause found
|
||||
- **MONITORING** - Fix applied, watching for stability
|
||||
- **RESOLVED** - Issue fully resolved
|
||||
Incident updates are timeline entries used to communicate progress and move incident state.
|
||||
|
||||
**Important:** When you set state to **RESOLVED**, Kener automatically sets the incident's `end_date_time` to the update's timestamp.
|
||||
## Quick reference {#quick-reference}
|
||||
|
||||
### Timestamp (Required) {#timestamp}
|
||||
When posting an update, choose one state:
|
||||
|
||||
When this update was made.
|
||||
- `INVESTIGATING`
|
||||
- `IDENTIFIED`
|
||||
- `MONITORING`
|
||||
- `RESOLVED`
|
||||
|
||||
**Default:** Current date and time
|
||||
Setting `RESOLVED` closes the incident and sets end time.
|
||||
|
||||
**Can Be Modified:**
|
||||
|
||||
- Useful for backdating updates
|
||||
- Aligning timeline with actual events
|
||||
- Recording updates made after the fact
|
||||
Use concise, user-facing text and include only meaningful changes.
|
||||
|
||||
**Display:** Shown in your local timezone but stored as UTC
|
||||
## See also {#see-also}
|
||||
|
||||
## Creating Updates {#creating-updates}
|
||||
|
||||
### During Incident Creation {#during-creation}
|
||||
|
||||
When creating a new incident, you can optionally provide an **Initial Update**:
|
||||
|
||||
1. Fill out the incident details
|
||||
2. Add text in the "Initial Update" field
|
||||
3. Uses Markdown formatting
|
||||
4. Automatically created with state "INVESTIGATING"
|
||||
5. Timestamp matches incident start time
|
||||
|
||||
### Adding Updates to Existing Incidents {#adding-updates}
|
||||
|
||||
1. Navigate to the incident detail page
|
||||
2. Click **Add Update** button
|
||||
3. Enter your message (Markdown supported)
|
||||
4. Select the new state
|
||||
5. Optionally adjust the timestamp
|
||||
6. Click **Add Update**
|
||||
|
||||
**What Happens:**
|
||||
|
||||
- Update is added to the incident timeline
|
||||
- Incident state changes to the update's state
|
||||
- If state is RESOLVED, end_date_time is set
|
||||
- Update appears immediately on status page
|
||||
- Subscribers receive notifications (if configured)
|
||||
|
||||
## Editing Updates {#editing-updates}
|
||||
|
||||
You can edit any existing update:
|
||||
|
||||
1. Find the update in the incident timeline
|
||||
2. Click the **Edit** (pencil icon) button
|
||||
3. Modify the message
|
||||
4. Change the state if needed
|
||||
5. Adjust the timestamp if needed
|
||||
6. Click **Save**
|
||||
|
||||
**Important State Changes:**
|
||||
- [Creating and Managing Incidents](/docs/v4/incidents/creating-managing)
|
||||
- [Impact on Monitoring](/docs/v4/incidents/impact-on-monitoring)
|
||||
**Important State Changes:**
|
||||
|
||||
**Moving to RESOLVED:**
|
||||
|
||||
@@ -434,7 +397,7 @@ When configured with the subscription system:
|
||||
- Incident is created
|
||||
- Incident is resolved
|
||||
|
||||
See [Subscription documentation](/docs/subscriptions) for setup.
|
||||
See [Subscription documentation](/docs/v4/subscriptions) for setup.
|
||||
|
||||
## Next Steps {#next-steps}
|
||||
|
||||
|
||||
@@ -73,81 +73,83 @@ Select whether this is a one-time or recurring maintenance:
|
||||
- Select the exact date and time when maintenance begins
|
||||
- Time is in your local timezone
|
||||
- Stored in UTC internally
|
||||
|
||||
**Example:**
|
||||
title: Creating and Managing Maintenances
|
||||
description: Create a maintenance window, choose impact, and manage recurring schedules
|
||||
|
||||
```
|
||||
Start: May 15, 2026 at 2:00 AM
|
||||
Use **Manage → Maintenances** to create and manage planned work windows.
|
||||
```
|
||||
|
||||
## Create a maintenance {#create-a-maintenance}
|
||||
|
||||
#### For Recurring Maintenances {#recurring-schedule}
|
||||
|
||||
**First Occurrence Date/Time** (Required)
|
||||
1. Click **New Maintenance**.
|
||||
2. Choose schedule type:
|
||||
- One-time
|
||||
- Recurring (RRULE)
|
||||
3. Fill required fields:
|
||||
- Title
|
||||
- Start date/time
|
||||
- Duration
|
||||
4. Select affected monitors and set impact.
|
||||
5. Click **Create Maintenance**.
|
||||
**First Occurrence Date/Time** (Required)
|
||||
|
||||
## Impact settings {#impact-settings}
|
||||
|
||||
- Select when the first maintenance occurrence happens
|
||||
- Subsequent occurrences use this same time of day
|
||||
- Date determines the starting point for the recurrence pattern
|
||||
Set per monitor:
|
||||
**RRULE Pattern** (Required)
|
||||
- `MAINTENANCE` (recommended)
|
||||
- `DOWN`
|
||||
- `DEGRADED`
|
||||
- `UP`
|
||||
Enter an iCalendar RRULE pattern directly or use the quick pattern buttons:
|
||||
|
||||
**RRULE Pattern** (Required)
|
||||
|
||||
Enter an iCalendar RRULE pattern directly or use the quick pattern buttons:
|
||||
## RRULE quick examples {#rrule-quick-examples}
|
||||
|
||||
**Quick Pattern Buttons:**
|
||||
|
||||
```text
|
||||
FREQ=WEEKLY;BYDAY=SU
|
||||
FREQ=DAILY
|
||||
FREQ=MONTHLY;BYMONTHDAY=1
|
||||
```
|
||||
|
||||
- **Every Sunday** - `FREQ=WEEKLY;BYDAY=SU`
|
||||
- **Every Day** - `FREQ=DAILY`
|
||||
For one-time maintenances, Kener uses:
|
||||
- **Weekdays** - `FREQ=WEEKLY;BYDAY=MO,TU,WE,TH,FR`
|
||||
- **Every Monday** - `FREQ=WEEKLY;BYDAY=MO`
|
||||
- **Bi-weekly Monday** - `FREQ=WEEKLY;INTERVAL=2;BYDAY=MO`
|
||||
- **First of Month** - `FREQ=MONTHLY;BYMONTHDAY=1`
|
||||
|
||||
Click any button to automatically fill the RRULE field. You can also type a custom RRULE pattern directly.
|
||||
|
||||
**Preview Dates:**
|
||||
|
||||
As you configure the RRULE and start time, the form shows the next 5 upcoming occurrences based on your pattern. This helps verify your recurrence configuration is correct.
|
||||
|
||||
**Examples:**
|
||||
|
||||
_Weekly on Sundays:_
|
||||
|
||||
```
|
||||
RRULE: FREQ=WEEKLY;BYDAY=SU
|
||||
```text
|
||||
FREQ=MINUTELY;COUNT=1
|
||||
```
|
||||
|
||||
_Bi-weekly on Mondays:_
|
||||
## Edit maintenance {#edit-maintenance}
|
||||
|
||||
```
|
||||
RRULE: FREQ=WEEKLY;INTERVAL=2;BYDAY=MO
|
||||
```
|
||||
You can edit title, description, schedule, duration, monitor list, and status.
|
||||
|
||||
_Daily:_
|
||||
When schedule/duration changes, future generated events are recalculated.
|
||||
|
||||
```
|
||||
RRULE: FREQ=DAILY
|
||||
```
|
||||
## Activate or pause {#activate-or-pause}
|
||||
|
||||
_First of every month:_
|
||||
- `ACTIVE`: normal behavior and event generation
|
||||
- `INACTIVE`: disables maintenance behavior
|
||||
RRULE: FREQ=WEEKLY;BYDAY=SU
|
||||
|
||||
```
|
||||
RRULE: FREQ=MONTHLY;BYMONTHDAY=1
|
||||
```
|
||||
## Delete maintenance {#delete-maintenance}
|
||||
|
||||
For more complex patterns, see the [RRULE Patterns](/docs/maintenances/rrule-patterns) documentation.
|
||||
Deleting a maintenance removes it and related events.
|
||||
|
||||
### Step 5: Duration {#step-5-duration}
|
||||
> [!WARNING]
|
||||
> Deletion is irreversible.
|
||||
> RRULE: FREQ=WEEKLY;INTERVAL=2;BYDAY=MO
|
||||
|
||||
Specify how long each maintenance window lasts:
|
||||
## Related guides {#related-guides}
|
||||
|
||||
**Hours** (Required)
|
||||
|
||||
- 0-72 hours
|
||||
- Whole number
|
||||
|
||||
**Minutes** (Required)
|
||||
|
||||
- 0-59 minutes
|
||||
- Whole number
|
||||
- [Maintenance Events](/docs/v4/maintenances/events)
|
||||
- [Impact on Monitoring](/docs/v4/maintenances/impact-on-monitoring)
|
||||
- [RRULE Patterns](/docs/v4/maintenances/rrule-patterns)
|
||||
|
||||
**Total Duration:**
|
||||
The system calculates `duration_seconds = (hours × 3600) + (minutes × 60)`
|
||||
|
||||
@@ -179,52 +179,59 @@ Action: Override monitor statuses
|
||||
|
||||
```
|
||||
READY → ONGOING
|
||||
Condition: current_time >= start_time AND current_time < end_time
|
||||
Executed by: Status update scheduler (runs every minute)
|
||||
title: Maintenance Events
|
||||
description: How maintenance events are generated and shown to users
|
||||
```
|
||||
|
||||
#### COMPLETED {#completed-state}
|
||||
A maintenance event is one occurrence of a maintenance window.
|
||||
|
||||
**When:** Current time is past end time
|
||||
## Event generation {#event-generation}
|
||||
|
||||
**Meaning:**
|
||||
### One-time maintenance {#one-time-maintenance}
|
||||
|
||||
- Maintenance has finished
|
||||
- Creates one event.
|
||||
- Triggered when maintenance is created.
|
||||
- Monitor statuses restored to realtime values
|
||||
- Historical record
|
||||
|
||||
**Displayed As:** "Completed"
|
||||
### Recurring maintenance {#recurring-maintenance}
|
||||
|
||||
**Notification:** "Maintenance Completed" notification sent
|
||||
- Creates upcoming events from RRULE.
|
||||
- Scheduler refreshes upcoming occurrences.
|
||||
- Duplicate event start times are skipped.
|
||||
|
||||
**Example:**
|
||||
## Event statuses {#event-statuses}
|
||||
|
||||
```
|
||||
- `SCHEDULED`
|
||||
- `READY`
|
||||
- `ONGOING`
|
||||
- `COMPLETED`
|
||||
- `CANCELLED`
|
||||
|
||||
Status transitions are time-based and automatic.
|
||||
Current Time: May 15, 4:05 PM
|
||||
Event End: May 15, 4:00 PM
|
||||
|
||||
## User-visible behavior {#user-visible-behavior}
|
||||
|
||||
Status: COMPLETED (finished 5 minutes ago)
|
||||
Action: Restore monitor statuses
|
||||
```
|
||||
|
||||
- Ongoing events affect monitor display according to impact settings.
|
||||
- Upcoming and past visibility depends on site/page event display settings.
|
||||
|
||||
```
|
||||
## Manual actions {#manual-actions}
|
||||
**Automatic Transition:**
|
||||
|
||||
- You can cancel/delete events from maintenance management screens.
|
||||
- Edit the parent maintenance to regenerate future schedule behavior.
|
||||
```
|
||||
ONGOING → COMPLETED
|
||||
|
||||
## Related guides {#related-guides}
|
||||
|
||||
Condition: current_time >= end_time
|
||||
Executed by: Status update scheduler (runs every minute)
|
||||
```
|
||||
|
||||
#### CANCELLED {#cancelled-state}
|
||||
|
||||
**When:** Manually cancelled by user
|
||||
|
||||
**Meaning:**
|
||||
|
||||
- Maintenance was scheduled but won't happen
|
||||
- Monitor statuses not overridden
|
||||
- Marked explicitly as cancelled vs completed
|
||||
|
||||
**Displayed As:** "Cancelled"
|
||||
- [Creating and Managing Maintenances](/docs/v4/maintenances/creating-managing)
|
||||
- [Impact on Monitoring](/docs/v4/maintenances/impact-on-monitoring)
|
||||
- [RRULE Patterns](/docs/v4/maintenances/rrule-patterns)
|
||||
**Displayed As:** "Cancelled"
|
||||
|
||||
**Notification:** No automatic notification
|
||||
|
||||
|
||||
@@ -66,79 +66,63 @@ Displayed Status: MAINTENANCE
|
||||
|
||||
**Example:**
|
||||
|
||||
```
|
||||
Monitor: Database
|
||||
Realtime Status: UP (checks passing now)
|
||||
Incident: OPEN
|
||||
Monitor Impact: DOWN
|
||||
|
||||
Displayed Status: DOWN
|
||||
```
|
||||
|
||||
title: Impact on Monitoring
|
||||
description: How maintenance events affect displayed monitor status
|
||||
**Rationale:** Incident status takes precedence over current checks during active issues.
|
||||
|
||||
**Note:** Maintenance status takes precedence over incident status. If both exist, maintenance wins.
|
||||
During an **ONGOING** maintenance event, maintenance impact can override monitor status shown to users.
|
||||
|
||||
#### 3. Realtime Monitoring Data {#realtime-priority}
|
||||
## Status precedence {#status-precedence}
|
||||
|
||||
**When:** No maintenance or incident affecting this monitor
|
||||
Kener resolves status in this order (later overrides earlier):
|
||||
|
||||
**Display:** Show latest monitoring check result (UP, DOWN, DEGRADED)
|
||||
`default status → realtime monitor result → incident impact → maintenance impact`
|
||||
|
||||
**Example:**
|
||||
So maintenance has the highest effective priority when active.
|
||||
|
||||
## Impact values {#impact-values}
|
||||
|
||||
```
|
||||
Monitor: Web Server
|
||||
Latest Check: DOWN (connection timeout)
|
||||
No Maintenance: ✓
|
||||
No Incident: ✓
|
||||
Set per monitor in a maintenance:
|
||||
|
||||
Displayed Status: DOWN
|
||||
```
|
||||
- `MAINTENANCE` (recommended)
|
||||
- `DOWN`
|
||||
- `DEGRADED`
|
||||
- `UP`
|
||||
|
||||
#### 4. Default Monitor Status (Lowest Priority) {#default-priority}
|
||||
Choose the value that matches expected user impact during the window.
|
||||
|
||||
**When:** No monitoring data exists yet
|
||||
## Event lifecycle behavior {#event-lifecycle-behavior}
|
||||
|
||||
**Display:** Monitor's configured default status
|
||||
- `SCHEDULED` / `READY`: no override yet
|
||||
- `ONGOING`: override active
|
||||
- `COMPLETED` / `CANCELLED`: override removed
|
||||
|
||||
**Example:**
|
||||
## Realtime monitoring still runs {#realtime-monitoring-still-runs}
|
||||
|
||||
```
|
||||
Monitor: New Service
|
||||
No Checks Run: (just created)
|
||||
Even during maintenance, checks continue and data is recorded.
|
||||
Default Status: UP
|
||||
|
||||
Maintenance changes **displayed/effective** status, not monitor execution.
|
||||
Displayed Status: UP
|
||||
```
|
||||
|
||||
## Monitor Impact Levels {#impact-levels}
|
||||
## Practical guidance {#practical-guidance}
|
||||
|
||||
When configuring a maintenance, you specify the impact for each affected monitor:
|
||||
- Prefer `MAINTENANCE` for planned work communication.
|
||||
- Use `DOWN` only when service is expected to be unavailable.
|
||||
- Avoid overlapping maintenances on the same monitor.
|
||||
|
||||
### MAINTENANCE {#maintenance-impact}
|
||||
## Related guides {#related-guides}
|
||||
|
||||
**Visual:** Orange/yellow, wrench icon
|
||||
|
||||
**Meaning:** Service is under planned maintenance
|
||||
|
||||
**When to Use:**
|
||||
|
||||
- General maintenance work
|
||||
- Service available but under maintenance
|
||||
- Default/recommended choice
|
||||
|
||||
**User Interpretation:** "Service may be affected due to planned work"
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
Monitor: API Server
|
||||
Impact: MAINTENANCE
|
||||
During Event: Shows orange "Under Maintenance"
|
||||
```
|
||||
|
||||
### DOWN {#down-impact}
|
||||
- [Maintenances Overview](/docs/v4/maintenances/overview)
|
||||
- [Maintenance Events](/docs/v4/maintenances/events)
|
||||
- [Creating and Managing Maintenances](/docs/v4/maintenances/creating-managing)
|
||||
|
||||
**Visual:** Red, X icon
|
||||
|
||||
|
||||
@@ -90,92 +90,75 @@ Kener uses the industry-standard [iCalendar RRULE](http://www.kanzaki.com/docs/i
|
||||
FREQ=frequency;[INTERVAL=n;][BYDAY=days;][COUNT=n;]
|
||||
```
|
||||
|
||||
**Example Patterns:**
|
||||
|
||||
| Pattern | RRULE | Meaning |
|
||||
title: Maintenances Overview
|
||||
description: Plan and communicate scheduled service work with one-time or recurring maintenance windows
|
||||
| Pattern | RRULE | Meaning |
|
||||
| :---------------------- | :--------------------------------- | :-------------------- |
|
||||
| Every Sunday | `FREQ=WEEKLY;BYDAY=SU` | Weekly on Sunday |
|
||||
| Every 2 weeks on Monday | `FREQ=WEEKLY;INTERVAL=2;BYDAY=MO` | Bi-weekly on Monday |
|
||||
| Every day | `FREQ=DAILY` | Daily |
|
||||
| First of each month | `FREQ=MONTHLY;BYMONTHDAY=1` | Monthly on day 1 |
|
||||
| Weekdays only | `FREQ=WEEKLY;BYDAY=MO,TU,WE,TH,FR` | Monday through Friday |
|
||||
Maintenances are planned service windows. Use them to communicate expected downtime or degradation before work starts.
|
||||
| Every 2 weeks on Monday | `FREQ=WEEKLY;INTERVAL=2;BYDAY=MO` | Bi-weekly on Monday |
|
||||
|
||||
Learn more in [RRULE Patterns](/docs/maintenances/rrule-patterns).
|
||||
## What a maintenance includes {#what-a-maintenance-includes}
|
||||
|
||||
## Maintenance Events {#maintenance-events}
|
||||
|
||||
When you create a maintenance, Kener automatically generates **maintenance events** based on the RRULE:
|
||||
| First of each month | `FREQ=MONTHLY;BYMONTHDAY=1` | Monthly on day 1 |
|
||||
Each maintenance has:
|
||||
|
||||
- Title and optional description
|
||||
- Start time
|
||||
- Duration
|
||||
- Schedule (`one-time` or recurring `RRULE`)
|
||||
- Affected monitors with impact
|
||||
- Status (`ACTIVE` / `INACTIVE`)
|
||||
- **One-Time Maintenances:** Generate 1 event at creation time
|
||||
- **Recurring Maintenances:** Generate events for the next 7 days, refreshed hourly
|
||||
|
||||
Each event represents a single occurrence of the maintenance window and tracks:
|
||||
## Maintenance vs incident {#maintenance-vs-incident}
|
||||
|
||||
- Start date/time
|
||||
- End date/time (calculated from duration)
|
||||
- Status (SCHEDULED → READY → ONGOING → COMPLETED)
|
||||
| Aspect | Maintenance | Incident |
|
||||
| ---------- | --------------------------- | ----------------------------- |
|
||||
| Nature | Planned | Unplanned |
|
||||
| Timing | Scheduled in advance | Created when issue occurs |
|
||||
| Recurrence | Can recur with RRULE | Typically one-off |
|
||||
| Purpose | Communicate expected impact | Communicate active disruption |
|
||||
|
||||
Events are covered in detail in [Maintenance Events](/docs/maintenances/events).
|
||||
|
||||
## Maintenance Status {#maintenance-status}
|
||||
## One-time vs recurring {#one-time-vs-recurring}
|
||||
|
||||
Maintenances have two levels of status:
|
||||
|
||||
- **One-time**: single event (`FREQ=MINUTELY;COUNT=1`)
|
||||
- **Recurring**: repeated events from RRULE (for example weekly/monthly patterns)
|
||||
|
||||
### Maintenance-Level Status {#maintenance-level-status}
|
||||
|
||||
## Monitor impact during maintenance {#monitor-impact-during-maintenance}
|
||||
|
||||
Controls whether the maintenance is active in the system:
|
||||
Set per-monitor impact for the maintenance window:
|
||||
|
||||
- **ACTIVE** - Maintenance is enabled and will generate events
|
||||
- **INACTIVE** - Maintenance is disabled and will not affect monitors
|
||||
- `MAINTENANCE` (recommended for planned work)
|
||||
- `DOWN`
|
||||
- `DEGRADED`
|
||||
- `UP` (rare)
|
||||
|
||||
**Note:** Changing a maintenance to INACTIVE does not cancel already scheduled events. You must manually cancel or delete those events.
|
||||
When an event is ongoing, this impact can override realtime status shown to users.
|
||||
|
||||
### Event-Level Status {#event-level-status}
|
||||
## Event lifecycle {#event-lifecycle}
|
||||
|
||||
Tracks the lifecycle of each individual maintenance occurrence:
|
||||
Each occurrence is a maintenance event that moves through statuses:
|
||||
|
||||
- **SCHEDULED** - Event created, more than 60 minutes away
|
||||
- `SCHEDULED`
|
||||
- `READY` (starting soon)
|
||||
- `ONGOING`
|
||||
- `COMPLETED`
|
||||
- `CANCELLED`
|
||||
- **READY** - Event starts within 60 minutes (notification sent)
|
||||
- **ONGOING** - Event is currently in progress
|
||||
|
||||
## Related guides {#related-guides}
|
||||
|
||||
- **COMPLETED** - Event has finished
|
||||
- **CANCELLED** - Event was manually cancelled
|
||||
|
||||
Events automatically transition through states based on the current time.
|
||||
|
||||
## Affected Monitors {#affected-monitors}
|
||||
|
||||
Each maintenance specifies which monitors are affected and their expected status during the maintenance window:
|
||||
|
||||
**Monitor Impact Options:**
|
||||
|
||||
- **MAINTENANCE** - Show as under maintenance (recommended)
|
||||
- **DOWN** - Show as completely unavailable
|
||||
- **DEGRADED** - Show as partially available
|
||||
- **UP** - Show as operational (rare, for non-disruptive maintenance)
|
||||
|
||||
When a maintenance event is ONGOING, the specified impact **overrides** the monitor's realtime status on the status page.
|
||||
|
||||
Learn more in [Maintenance Impact on Monitoring](/docs/maintenances/impact-on-monitoring).
|
||||
|
||||
## Public Visibility {#public-visibility}
|
||||
|
||||
Maintenances are visible to users on your public status page:
|
||||
|
||||
### Upcoming Maintenances {#upcoming-maintenances}
|
||||
|
||||
- Shown on the home page
|
||||
- Displays upcoming events (configurable days ahead)
|
||||
- Shows affected monitors and maintenance window
|
||||
|
||||
### Ongoing Maintenances {#ongoing-maintenances}
|
||||
|
||||
- Prominently displayed during the maintenance window
|
||||
- Affected monitors show maintenance status
|
||||
- Duration and progress indicators
|
||||
|
||||
### Past Maintenances {#past-maintenances}
|
||||
|
||||
- Listed on the events/history page
|
||||
- [Creating and Managing Maintenances](/docs/v4/maintenances/creating-managing)
|
||||
- [Maintenance Events](/docs/v4/maintenances/events)
|
||||
- [Impact on Monitoring](/docs/v4/maintenances/impact-on-monitoring)
|
||||
- [RRULE Patterns](/docs/v4/maintenances/rrule-patterns)
|
||||
- Shows completed maintenance events
|
||||
- Configurable retention period
|
||||
|
||||
|
||||
@@ -18,63 +18,71 @@ FREQ=frequency[;INTERVAL=n][;BYDAY=days][;BYMONTHDAY=day][;COUNT=n][;UNTIL=date]
|
||||
### Supported Components {#supported-components}
|
||||
|
||||
**FREQ** (Required)
|
||||
title: RRULE Patterns
|
||||
description: Common RRULE patterns for recurring maintenance schedules
|
||||
|
||||
- `DAILY` - Daily recurrence
|
||||
- `WEEKLY` - Weekly recurrence
|
||||
- `MONTHLY` - Monthly recurrence
|
||||
Kener uses iCalendar RRULE strings to schedule recurring maintenances.
|
||||
**INTERVAL** (Optional)
|
||||
|
||||
**INTERVAL** (Optional)
|
||||
## RRULE basics {#rrule-basics}
|
||||
|
||||
- Default: 1
|
||||
- Integer value: 2 = every other, 3 = every third, etc.
|
||||
Format:
|
||||
|
||||
**BYDAY** (Optional, for WEEKLY)
|
||||
|
||||
- Day codes: `MO`, `TU`, `WE`, `TH`, `FR`, `SA`, `SU`
|
||||
- Multiple days: `MO,WE,FR`
|
||||
```text
|
||||
FREQ=...;[INTERVAL=n];[BYDAY=...];[BYMONTHDAY=...]
|
||||
```
|
||||
|
||||
For one-time maintenances, Kener uses:
|
||||
**BYMONTHDAY** (Optional, for MONTHLY)
|
||||
|
||||
```text
|
||||
FREQ=MINUTELY;COUNT=1
|
||||
```
|
||||
|
||||
- Day of month: 1-31
|
||||
- Example: `BYMONTHDAY=1` (first of month)
|
||||
|
||||
**COUNT** (For one-time only)
|
||||
## Common patterns {#common-patterns}
|
||||
|
||||
| Use case | RRULE |
|
||||
| ----------------------- | ---------------------------------- |
|
||||
| Every day | `FREQ=DAILY` |
|
||||
| Every Sunday | `FREQ=WEEKLY;BYDAY=SU` |
|
||||
| Weekdays | `FREQ=WEEKLY;BYDAY=MO,TU,WE,TH,FR` |
|
||||
| Every 2 weeks on Monday | `FREQ=WEEKLY;INTERVAL=2;BYDAY=MO` |
|
||||
| First day of each month | `FREQ=MONTHLY;BYMONTHDAY=1` |
|
||||
|
||||
- `COUNT=1` - Single occurrence
|
||||
- Used for one-time maintenances
|
||||
|
||||
## Day codes {#day-codes}
|
||||
|
||||
## Common Patterns {#common-patterns}
|
||||
|
||||
### Daily Patterns {#daily-patterns}
|
||||
- `MO` Monday
|
||||
- `TU` Tuesday
|
||||
- `WE` Wednesday
|
||||
- `TH` Thursday
|
||||
- `FR` Friday
|
||||
- `SA` Saturday
|
||||
- `SU` Sunday
|
||||
|
||||
#### Every Day {#every-day}
|
||||
## Tips {#tips}
|
||||
|
||||
**RRULE:** `FREQ=DAILY`
|
||||
- Use simple, readable patterns.
|
||||
- Verify next occurrences in the maintenance UI preview.
|
||||
- Avoid overlapping schedules for the same monitor.
|
||||
**Description:** Maintenance occurs every single day at the configured time
|
||||
|
||||
**Description:** Maintenance occurs every single day at the configured time
|
||||
## Related guides {#related-guides}
|
||||
|
||||
**Use Case:** Daily backup windows, nightly cleanup tasks
|
||||
|
||||
**Example:**
|
||||
|
||||
```
|
||||
Title: Nightly Database Backup
|
||||
Start: 2:00 AM (any day)
|
||||
RRULE: FREQ=DAILY
|
||||
Duration: 30 minutes
|
||||
|
||||
Occurrences:
|
||||
- Today at 2:00 AM
|
||||
- Tomorrow at 2:00 AM
|
||||
- Day after at 2:00 AM
|
||||
- Continues daily...
|
||||
```
|
||||
|
||||
#### Every Other Day {#every-other-day}
|
||||
|
||||
**RRULE:** `FREQ=DAILY;INTERVAL=2`
|
||||
|
||||
**Description:** Maintenance occurs every 2 days
|
||||
- [Creating and Managing Maintenances](/docs/v4/maintenances/creating-managing)
|
||||
- [Maintenance Events](/docs/v4/maintenances/events)
|
||||
- [Maintenances Overview](/docs/v4/maintenances/overview)
|
||||
**Description:** Maintenance occurs every 2 days
|
||||
|
||||
**Use Case:** Bi-daily tasks
|
||||
|
||||
|
||||
@@ -1,240 +1,68 @@
|
||||
---
|
||||
title: API Monitor
|
||||
description: Monitor HTTP/HTTPS endpoints with custom methods, headers, and advanced evaluation logic
|
||||
description: Monitor HTTP/HTTPS endpoints with custom methods, payloads, and eval logic
|
||||
---
|
||||
|
||||
API monitors allow you to track the uptime, latency, and correctness of your HTTP/HTTPS endpoints. Kener provides flexible options to configure the request including custom headers, request bodies, authentication, and JavaScript-based evaluation logic to determine status.
|
||||
API monitors send HTTP requests and evaluate the response with JavaScript.
|
||||
|
||||
## How API Monitoring Works {#how-api-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's API monitoring follows this workflow:
|
||||
1. Set `url`
|
||||
2. Choose `method` (default `GET`)
|
||||
3. Set `timeout` (default `10000` ms)
|
||||
4. Save (optional: custom `eval` function)
|
||||
|
||||
1. **Build Request**: Kener constructs an HTTP request with the configured method, headers, body, and timeout.
|
||||
2. **Environment Variables**: Any `$VARIABLE_NAME` placeholders in URL, headers, or body are replaced with environment variable values.
|
||||
3. **Execute Request**: The request is sent using Axios with configurable SSL settings.
|
||||
4. **Measure Latency**: Total time from request start to response completion is recorded.
|
||||
5. **Evaluate Response**: The custom eval function receives the status code, latency, and response body to determine monitor status.
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
| Field | Type | Default | Notes |
|
||||
| :-------------------- | :--------------------------------------------- | :--------------- | :---------------------------- |
|
||||
| `url` | `string` | — | Required |
|
||||
| `method` | `GET\|POST\|PUT\|PATCH\|DELETE\|HEAD\|OPTIONS` | `GET` | |
|
||||
| `headers` | `{ key, value }[]` | `[]` | Optional custom headers |
|
||||
| `body` | `string` | `""` | Sent for non-GET/HEAD methods |
|
||||
| `timeout` | `number` | `10000` | Request timeout in ms |
|
||||
| `allowSelfSignedCert` | `boolean` | `false` | Disables TLS verify when true |
|
||||
| `eval` | `string` (JS function) | built-in default | Receives response details |
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :-------------------------- | :--------- | :----------------------------------------------------------------------------------------------- | :------------ |
|
||||
| **URL** | `string` | The fully qualified URL to monitor (e.g., `https://api.example.com/health`). | (Required) |
|
||||
| **Method** | `string` | HTTP method to use. Options: `GET`, `POST`, `PUT`, `PATCH`, `DELETE`, `HEAD`, `OPTIONS`. | `GET` |
|
||||
| **Timeout** | `number` | Maximum time in milliseconds to wait for a response before considering it a failure. | `10000` (10s) |
|
||||
| **Headers** | `array` | List of custom HTTP headers (Key-Value pairs). Useful for authentication or content negotiation. | `[]` |
|
||||
| **Body** | `string` | The raw request body to send. Required for methods like `POST` or `PUT`. Usually a JSON string. | `""` |
|
||||
| **Allow Self-Signed Certs** | `boolean` | If enabled, the monitor will ignore SSL certificate errors (e.g., self-signed or expired certs). | `false` |
|
||||
| **Custom Eval** | `function` | A JavaScript snippet to manually validate the response and determine the status. | Default logic |
|
||||
## Default eval behavior {#default-eval}
|
||||
|
||||
## Custom Evaluation {#custom-evaluation}
|
||||
Built-in eval marks the monitor **UP** when:
|
||||
|
||||
By default, Kener considers a monitor **UP** if the HTTP status code is **2xx** (200-299). You can override this behavior using the **Custom Eval** field. This is powerful for checking specific JSON properties in the response or setting thresholds for latency.
|
||||
- status code is `429`, or
|
||||
- status code is in `2xx` or `3xx`
|
||||
|
||||
The evaluation function runs in a sandboxed environment and receives the following arguments:
|
||||
Otherwise it returns **DOWN**.
|
||||
|
||||
- `statusCode` (`number`): The HTTP status code returned by the server.
|
||||
- `responseTime` (`number`): Time taken for the request in milliseconds.
|
||||
- `responseRaw` (`string`): The raw response body (text).
|
||||
- `modules` (`object`): Available helper modules. Currently supports [`cheerio`](https://cheerio.js.org/) for HTML parsing.
|
||||
## Custom eval contract {#custom-eval-contract}
|
||||
|
||||
**Return Value:**
|
||||
The function can be **synchronous** or **asynchronous** (using `async` or returning a `Promise`). It **must** return (or resolve to) an object with:
|
||||
Your function receives:
|
||||
|
||||
- `status`: `'UP'`, `'DEGRADED'`, `'DOWN'` or `'MAINTENANCE'`.
|
||||
- `latency`: The latency to record (typically just returns `responseTime`).
|
||||
- `statusCode`
|
||||
- `responseTime`
|
||||
- `responseRaw`
|
||||
- `modules` (currently includes `cheerio`)
|
||||
|
||||
### Default Implementation {#default-implementation}
|
||||
|
||||
Here is the default logic used if you don't provide a custom function:
|
||||
It must return:
|
||||
|
||||
```javascript
|
||||
;(statusCode, responseTime, responseRaw, modules) => {
|
||||
let status = "DOWN"
|
||||
// Success Range
|
||||
if (statusCode >= 200 && statusCode < 299) {
|
||||
status = "UP"
|
||||
}
|
||||
// Rate Limited (429) is often considered degraded
|
||||
if (statusCode == 429) {
|
||||
status = "DEGRADED"
|
||||
}
|
||||
return {
|
||||
status: status,
|
||||
latency: responseTime
|
||||
}
|
||||
}
|
||||
{ status: "UP" | "DEGRADED" | "DOWN" | "MAINTENANCE", latency: number }
|
||||
```
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic GET Request {#basic-get-request}
|
||||
|
||||
A simple health check for a public URL.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "api-website",
|
||||
"name": "Homepage",
|
||||
"type": "API",
|
||||
"url": "https://kener.ing",
|
||||
"method": "GET",
|
||||
"timeout": 5000
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Authenticated POST with JSON {#authenticated-post-with-json}
|
||||
|
||||
Sending a login request with a Bearer token and JSON body.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "api-auth-check",
|
||||
"name": "Auth Service",
|
||||
"type": "API",
|
||||
"url": "https://api.example.com/v1/login",
|
||||
"method": "POST",
|
||||
"headers": [
|
||||
{ "key": "Content-Type", "value": "application/json" },
|
||||
{ "key": "Authorization", "value": "Bearer YOUR_SECRET_TOKEN" }
|
||||
],
|
||||
"body": "{\"username\": \"monitor_user\", \"password\": \"secure_password\"}",
|
||||
"timeout": 10000
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Advanced Validation (Business Logic) {#advanced-validation-business-logic}
|
||||
|
||||
In this example, we mark the monitor as **DOWN** if the JSON response is missing `success: true`, and **DEGRADED** if it takes longer than 500ms, even if the status code is 200.
|
||||
|
||||
**Configuration:**
|
||||
|
||||
- **URL**: `https://api.example.com/status`
|
||||
- **Method**: `GET`
|
||||
- **Custom Eval**:
|
||||
|
||||
```javascript
|
||||
;(statusCode, responseTime, responseRaw, modules) => {
|
||||
let status = "UP"
|
||||
let body = {}
|
||||
|
||||
try {
|
||||
body = JSON.parse(responseRaw)
|
||||
} catch (e) {
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
}
|
||||
|
||||
// Business Logic: 'success' must be true
|
||||
if (statusCode !== 200 || body.success !== true) {
|
||||
status = "DOWN"
|
||||
}
|
||||
// Performance Logic: Latency > 500ms is degraded
|
||||
else if (responseTime > 500) {
|
||||
status = "DEGRADED"
|
||||
}
|
||||
|
||||
return {
|
||||
status: status,
|
||||
latency: responseTime
|
||||
"type_data": {
|
||||
"url": "https://api.example.com/health",
|
||||
"method": "GET",
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. HTML Parsing with Cheerio {#html-parsing-with-cheerio}
|
||||
|
||||
You can use the built-in `cheerio` module to parse HTML responses and check for specific elements or text.
|
||||
|
||||
```javascript
|
||||
;async (statusCode, responseTime, responseRaw, modules) => {
|
||||
let html = responseRaw
|
||||
const $ = modules.cheerio.load(html)
|
||||
// Find all component containers
|
||||
const components = $(".components-section .components-container .component-container")
|
||||
|
||||
let status = true
|
||||
|
||||
// Iterate through components to check their status
|
||||
components.each((index, element) => {
|
||||
// const name = $(element).find(".component-name").text().trim();
|
||||
const statusText = $(element).find(".component-status").text().trim()
|
||||
|
||||
// Fail if any component is not 'Operational'
|
||||
if (statusText !== "Operational") {
|
||||
status = false
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
status: status ? "UP" : "DOWN",
|
||||
latency: responseTime
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> The above example uses an `async` function wrapper. While this particular example doesn't require async operations, the function can be async if you need to perform asynchronous operations within your evaluation logic.
|
||||
|
||||
## Using Environment Variables {#using-environment-variables}
|
||||
|
||||
Kener supports environment variable substitution in URLs, headers, and body content:
|
||||
|
||||
```
|
||||
https://api.example.com/v1/status?apikey=$API_KEY
|
||||
```
|
||||
|
||||
Environment variables are replaced at runtime:
|
||||
|
||||
- `$API_KEY` → value of `process.env.API_KEY`
|
||||
|
||||
This keeps sensitive credentials out of your configuration.
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### URL Configuration {#best-practices-url}
|
||||
|
||||
1. **Use dedicated health endpoints**: `/health`, `/status`, or `/ping` endpoints are lightweight.
|
||||
2. **Avoid authentication on health checks**: If possible, use endpoints that don't require auth.
|
||||
3. **Use HTTPS**: Always prefer secure connections for production monitoring.
|
||||
|
||||
### Timeout Configuration {#best-practices-timeout}
|
||||
|
||||
| Scenario | Recommended Timeout | Rationale |
|
||||
| :--------------------- | :------------------ | :------------------------- |
|
||||
| Health check endpoints | 5000ms | Should be fast |
|
||||
| API endpoints | 10000ms | Standard API response time |
|
||||
| Heavy processing | 30000ms | Reports, analytics, etc. |
|
||||
| External third-party | 15000ms | Network variability |
|
||||
|
||||
### Evaluation Logic {#best-practices-evaluation}
|
||||
|
||||
1. **Always return both status and latency**: The evaluation must return both fields.
|
||||
2. **Handle JSON parse errors**: Wrap `JSON.parse()` in try-catch.
|
||||
3. **Check for empty responses**: Empty body might indicate a problem.
|
||||
4. **Use DEGRADED appropriately**: For slow but functional services.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :---------------------- | :------------------------------------- | :------------------------------- |
|
||||
| Always DOWN | URL unreachable or wrong | Verify URL is accessible |
|
||||
| SSL errors | Self-signed or expired certificate | Enable "Allow Self-Signed Certs" |
|
||||
| Timeout errors | Server too slow or network issues | Increase timeout value |
|
||||
| Authentication failures | Wrong credentials or token expired | Check environment variables |
|
||||
| Eval errors | JavaScript syntax error in custom eval | Test eval function separately |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Test with curl**:
|
||||
|
||||
```bash
|
||||
curl -v -X GET "https://api.example.com/health" \
|
||||
-H "Authorization: Bearer $TOKEN"
|
||||
```
|
||||
|
||||
2. **Check response format**: Ensure you're parsing the response correctly (JSON vs HTML).
|
||||
|
||||
3. **Verify environment variables**: Ensure all `$VARIABLE` references are set.
|
||||
|
||||
4. **Test eval function**: Test your custom eval with sample data before deploying.
|
||||
- **Always DOWN**: verify URL/method/headers/body
|
||||
- **TLS errors**: enable `allowSelfSignedCert` only for trusted self-signed endpoints
|
||||
- **Eval errors**: simplify eval and validate return shape
|
||||
|
||||
@@ -1,157 +1,51 @@
|
||||
---
|
||||
title: DNS Monitor
|
||||
description: Monitor DNS record resolution and verify that your domains resolve to expected values
|
||||
description: Validate DNS records against expected values
|
||||
---
|
||||
|
||||
DNS monitors verify that your domain's DNS records resolve correctly by querying a specified name server and comparing the results against expected values. This is essential for detecting DNS misconfigurations, hijacking attempts, propagation issues, or CDN/load balancer changes.
|
||||
DNS monitors query records for a host and compare returned values to your expected values.
|
||||
|
||||
## How DNS Monitoring Works {#how-dns-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's DNS monitoring follows this workflow:
|
||||
Configure:
|
||||
|
||||
1. **Query DNS Server**: Kener sends a DNS query to the configured name server (e.g., `8.8.8.8`) for a specific record type.
|
||||
2. **Receive Response**: The DNS server returns all records matching the query (domain + record type).
|
||||
3. **Extract Data**: Kener extracts the relevant data from each DNS answer based on the record type.
|
||||
4. **Compare Values**: The returned values are compared against your expected values using the configured match type.
|
||||
5. **Determine Status**: Based on whether the expected values are found, the monitor is marked as UP or DOWN.
|
||||
- `host`
|
||||
- `lookupRecord` (default `A`)
|
||||
- `matchType` (`ANY` or `ALL`, default `ANY`)
|
||||
- at least one expected value in `values`
|
||||
|
||||
### DNS Query Process {#dns-query-process}
|
||||
`nameServer` is optional (leave blank for resolver defaults).
|
||||
|
||||
```
|
||||
┌─────────────┐ DNS Query ┌─────────────┐
|
||||
│ Kener │ ──────────────────── │ Name Server │
|
||||
│ Monitor │ (UDP Port 53) │ (8.8.8.8) │
|
||||
└─────────────┘ └─────────────┘
|
||||
│ │
|
||||
│ ┌────────────────────────┐ │
|
||||
└────│ Query: example.com A │──────┘
|
||||
└────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ Response: │
|
||||
│ - 93.184.216.34 │
|
||||
│ - TTL: 3600 │
|
||||
└────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ Compare with expected │
|
||||
│ values using match type│
|
||||
└────────────────────────┘
|
||||
```
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
| Field | Type | Default | Notes |
|
||||
| :------------- | :--------- | :------ | :-------------------------- |
|
||||
| `host` | `string` | — | Required |
|
||||
| `nameServer` | `string` | `""` | Optional override |
|
||||
| `lookupRecord` | `string` | `A` | Required |
|
||||
| `matchType` | `ANY\|ALL` | `ANY` | Required |
|
||||
| `values` | `string[]` | `[]` | Required (non-empty values) |
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :---------------- | :------- | :------------------------------------------------------------------ | :--------- |
|
||||
| **Host** | `string` | The domain name to query (e.g., `example.com`). | (Required) |
|
||||
| **Name Server** | `string` | The DNS server to query. Can be any public or private DNS resolver. | `8.8.8.8` |
|
||||
| **Lookup Record** | `string` | The DNS record type to query (A, AAAA, CNAME, MX, TXT, etc.). | `A` |
|
||||
| **Match Type** | `string` | How to compare expected values: `ALL` or `ANY`. | `ANY` |
|
||||
| **Values** | `array` | List of expected values that the DNS response should contain. | (Required) |
|
||||
## Match behavior {#match-behavior}
|
||||
|
||||
### Match Type Behavior {#match-type-behavior}
|
||||
- `ANY`: monitor is **UP** when at least one expected value is present
|
||||
- `ALL`: monitor is **UP** only when all expected values are present
|
||||
|
||||
| Match Type | Behavior | Status UP When |
|
||||
| :--------- | :---------------------------------------------------------- | :--------------------------- |
|
||||
| **ANY** | At least one expected value must be present in the response | Any expected value matches |
|
||||
| **ALL** | All expected values must be present in the response | Every expected value matches |
|
||||
## Normalization rules {#normalization-rules}
|
||||
|
||||
## Supported Record Types {#supported-record-types}
|
||||
Before comparison, values are normalized by runtime logic:
|
||||
|
||||
Kener supports a comprehensive set of DNS record types. Here are the most commonly used:
|
||||
- lowercased
|
||||
- trailing `.` removed
|
||||
- trimmed whitespace
|
||||
|
||||
### Common Record Types {#common-record-types}
|
||||
|
||||
| Record | Description | Example Data |
|
||||
| :-------- | :------------------------------------ | :------------------------------------ |
|
||||
| **A** | IPv4 address | `93.184.216.34` |
|
||||
| **AAAA** | IPv6 address | `2606:2800:220:1:248:1893:25c8:1946` |
|
||||
| **CNAME** | Canonical name (alias) | `www.example.com` |
|
||||
| **MX** | Mail exchange server | `mail.example.com` (with priority) |
|
||||
| **TXT** | Text record (SPF, DKIM, verification) | `v=spf1 include:_spf.google.com ~all` |
|
||||
| **NS** | Name server | `ns1.example.com` |
|
||||
| **SOA** | Start of authority | Primary NS, admin email, serial, etc. |
|
||||
| **PTR** | Pointer record (reverse DNS) | `hostname.example.com` |
|
||||
| **SRV** | Service location | `_sip._tcp.example.com` |
|
||||
|
||||
### Additional Supported Types {#additional-record-types}
|
||||
|
||||
Kener also supports these record types: `MD`, `MF`, `MB`, `MG`, `MR`, `NULL`, `WKS`, `HINFO`, `MINFO`, `RP`, `AFSDB`, `X25`, `ISDN`, `RT`, `NSAP`, `NSAP_PTR`, `SIG`, `KEY`, `PX`, `GPOS`, `LOC`, `NXT`, `EID`, `NIMLOC`, `ATMA`, `NAPTR`, `KX`, `CERT`, `A6`, `DNAME`, `SINK`, `OPT`, `APL`, `DS`, `SSHFP`, `IPSECKEY`, `RRSIG`, `NSEC`, `DNSKEY`, `DHCID`, `NSEC3`, `NSEC3PARAM`, `TLSA`, `SMIMEA`, `HIP`, `NINFO`, `RKEY`, `TALINK`, `CDS`, `CDNSKEY`, `OPENPGPKEY`, `CSYNC`, `SPF`, `UINFO`, `UID`, `GID`, `UNSPEC`, `NID`, `L32`, `L64`, `LP`, `EUI48`, `EUI64`, `TKEY`, `TSIG`, `IXFR`, `AXFR`, `MAILB`, `MAILA`, `ANY`.
|
||||
|
||||
## DNS Response Structure {#dns-response-structure}
|
||||
|
||||
When Kener queries a DNS record, it receives answers with this structure:
|
||||
|
||||
```javascript
|
||||
{
|
||||
name: "example.com", // Domain queried
|
||||
type: "A", // Record type
|
||||
ttl: 3600, // Time-to-live in seconds
|
||||
data: "93.184.216.34" // The actual record value
|
||||
}
|
||||
```
|
||||
|
||||
### Data Extraction by Record Type {#data-extraction}
|
||||
|
||||
Different record types return data in different formats:
|
||||
|
||||
| Record Type | Data Returned |
|
||||
| :-------------- | :------------------------------------ |
|
||||
| **A**, **AAAA** | IP address as string |
|
||||
| **NS** | Name server hostname |
|
||||
| **CNAME** | Canonical domain name |
|
||||
| **MX** | Object with `exchange` and `priority` |
|
||||
| **TXT** | Text content |
|
||||
| **Others** | Raw data from DNS response |
|
||||
|
||||
## Evaluation Logic {#evaluation-logic}
|
||||
|
||||
Unlike other monitor types, DNS monitors use a fixed evaluation logic based on the **Match Type** setting:
|
||||
|
||||
### ANY Match Logic {#any-match-logic}
|
||||
|
||||
```javascript
|
||||
// Pseudocode for ANY match
|
||||
for (let expectedValue of expectedValues) {
|
||||
if (dnsResponse.includes(expectedValue)) {
|
||||
return { status: "UP", latency: queryTime }
|
||||
}
|
||||
}
|
||||
return { status: "DOWN", latency: queryTime }
|
||||
```
|
||||
|
||||
**Use Case**: When your domain can resolve to multiple IPs (CDN, load balancer) and any one is acceptable.
|
||||
|
||||
### ALL Match Logic {#all-match-logic}
|
||||
|
||||
```javascript
|
||||
// Pseudocode for ALL match
|
||||
for (let expectedValue of expectedValues) {
|
||||
if (!dnsResponse.includes(expectedValue)) {
|
||||
return { status: "DOWN", latency: queryTime }
|
||||
}
|
||||
}
|
||||
return { status: "UP", latency: queryTime }
|
||||
```
|
||||
|
||||
**Use Case**: When you need to verify that specific records exist, such as ensuring all required TXT records for email authentication are present.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic A Record Check {#basic-a-record-check}
|
||||
|
||||
Verify that a domain resolves to a specific IP address.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "website-dns",
|
||||
"name": "Website DNS",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "A",
|
||||
"matchType": "ANY",
|
||||
"values": ["93.184.216.34"]
|
||||
@@ -159,333 +53,8 @@ Verify that a domain resolves to a specific IP address.
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Multiple IP Addresses (CDN/Load Balancer) {#multiple-ip-addresses}
|
||||
|
||||
Monitor a domain that may resolve to any of several IPs.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "cdn-dns",
|
||||
"name": "CDN DNS Resolution",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "cdn.example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "A",
|
||||
"matchType": "ANY",
|
||||
"values": ["104.16.123.96", "104.16.124.96", "104.16.125.96"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. IPv6 (AAAA) Record Check {#ipv6-record-check}
|
||||
|
||||
Verify IPv6 resolution for your domain.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "ipv6-dns",
|
||||
"name": "IPv6 DNS",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "AAAA",
|
||||
"matchType": "ANY",
|
||||
"values": ["2606:2800:220:1:248:1893:25c8:1946"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. CNAME Record Verification {#cname-record-verification}
|
||||
|
||||
Ensure a subdomain points to the correct canonical name.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "www-cname",
|
||||
"name": "WWW CNAME Record",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "www.example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "CNAME",
|
||||
"matchType": "ANY",
|
||||
"values": ["example.com"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. MX Record Monitoring {#mx-record-monitoring}
|
||||
|
||||
Verify mail server configuration. Note: MX records return the exchange server hostname.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "mail-dns",
|
||||
"name": "Email MX Records",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "MX",
|
||||
"matchType": "ANY",
|
||||
"values": ["mail.example.com"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. TXT Record for SPF Verification {#txt-spf-verification}
|
||||
|
||||
Monitor that your SPF record is correctly configured.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "spf-record",
|
||||
"name": "SPF Record",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "TXT",
|
||||
"matchType": "ANY",
|
||||
"values": ["v=spf1 include:_spf.google.com ~all"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 7. Domain Verification TXT Record {#domain-verification-txt}
|
||||
|
||||
Ensure domain verification records are present (Google, Microsoft, etc.).
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "google-verification",
|
||||
"name": "Google Site Verification",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "TXT",
|
||||
"matchType": "ANY",
|
||||
"values": ["google-site-verification=abc123xyz"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8. NS Record Monitoring {#ns-record-monitoring}
|
||||
|
||||
Verify that your domain is using the correct name servers.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "nameservers",
|
||||
"name": "Name Servers",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "NS",
|
||||
"matchType": "ALL",
|
||||
"values": ["ns1.example.com", "ns2.example.com"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 9. Multiple Required TXT Records {#multiple-txt-records}
|
||||
|
||||
Ensure all required TXT records exist (SPF, DKIM, DMARC verification).
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "email-auth-records",
|
||||
"name": "Email Authentication DNS",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "TXT",
|
||||
"matchType": "ALL",
|
||||
"values": ["v=spf1 include:_spf.google.com ~all", "google-site-verification=abc123"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 10. Using Custom/Private DNS Server {#custom-dns-server}
|
||||
|
||||
Query an internal DNS server for private domain resolution.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "internal-dns",
|
||||
"name": "Internal Service DNS",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "api.internal.company.com",
|
||||
"nameServer": "10.0.0.53",
|
||||
"lookupRecord": "A",
|
||||
"matchType": "ANY",
|
||||
"values": ["10.0.1.100"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 11. Cloudflare DNS Check {#cloudflare-dns-check}
|
||||
|
||||
Verify your domain is proxied through Cloudflare.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "cloudflare-proxy",
|
||||
"name": "Cloudflare Proxy",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
"nameServer": "1.1.1.1",
|
||||
"lookupRecord": "A",
|
||||
"matchType": "ANY",
|
||||
"values": ["104.21.x.x", "172.67.x.x"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 12. SRV Record for Services {#srv-record-services}
|
||||
|
||||
Monitor SRV records used for service discovery.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "sip-srv",
|
||||
"name": "SIP Service Record",
|
||||
"type": "DNS",
|
||||
"type_data": {
|
||||
"host": "_sip._tcp.example.com",
|
||||
"nameServer": "8.8.8.8",
|
||||
"lookupRecord": "SRV",
|
||||
"matchType": "ANY",
|
||||
"values": ["sip.example.com"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Common Use Cases {#common-use-cases}
|
||||
|
||||
### Website & Application Monitoring {#use-case-websites}
|
||||
|
||||
| Scenario | Record Type | Match Type | Purpose |
|
||||
| :----------------------- | :---------- | :--------- | :------------------------------------------ |
|
||||
| Single IP website | A | ANY | Verify domain resolves to expected IP |
|
||||
| Multi-region/CDN website | A | ANY | Confirm resolution to any valid edge server |
|
||||
| IPv6-enabled service | AAAA | ANY | Monitor IPv6 accessibility |
|
||||
| Subdomain alias | CNAME | ANY | Verify subdomain points correctly |
|
||||
|
||||
### Email Infrastructure {#use-case-email}
|
||||
|
||||
| Scenario | Record Type | Match Type | Purpose |
|
||||
| :------------ | :---------- | :--------- | :---------------------------------- |
|
||||
| Mail server | MX | ANY | Ensure mail routing is correct |
|
||||
| SPF record | TXT | ANY | Verify sender authentication |
|
||||
| DKIM selector | TXT | ANY | Confirm email signing is configured |
|
||||
| DMARC policy | TXT | ANY | Monitor email policy record |
|
||||
|
||||
### Infrastructure & Security {#use-case-infrastructure}
|
||||
|
||||
| Scenario | Record Type | Match Type | Purpose |
|
||||
| :--------------------- | :---------- | :--------- | :----------------------------------------- |
|
||||
| Name server delegation | NS | ALL | Verify all NS records are present |
|
||||
| Domain verification | TXT | ANY | Confirm third-party verifications |
|
||||
| CAA records | CAA | ANY | Monitor certificate authority restrictions |
|
||||
| Reverse DNS | PTR | ANY | Verify reverse lookup configuration |
|
||||
|
||||
## Popular DNS Servers {#popular-dns-servers}
|
||||
|
||||
You can use any DNS server for monitoring. Here are some popular public options:
|
||||
|
||||
| Provider | Primary DNS | Secondary DNS | Notes |
|
||||
| :--------------- | :--------------- | :---------------- | :------------------------------- |
|
||||
| Google | `8.8.8.8` | `8.8.4.4` | Reliable, global coverage |
|
||||
| Cloudflare | `1.1.1.1` | `1.0.0.1` | Fast, privacy-focused |
|
||||
| Quad9 | `9.9.9.9` | `149.112.112.112` | Security-focused, blocks malware |
|
||||
| OpenDNS | `208.67.222.222` | `208.67.220.220` | Cisco-owned, filtering options |
|
||||
| Authoritative NS | (varies) | - | Direct query to domain's NS |
|
||||
|
||||
### Choosing a Name Server {#choosing-nameserver}
|
||||
|
||||
- **Public DNS (8.8.8.8, 1.1.1.1)**: Good for general monitoring; may have caching delays.
|
||||
- **Authoritative NS**: Query the domain's own name servers for most accurate results.
|
||||
- **Private DNS**: Use internal DNS for monitoring private/internal domains.
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Record Type Selection {#best-practices-record-type}
|
||||
|
||||
1. **Monitor the record type you depend on**: If your app uses IPv6, monitor AAAA records.
|
||||
2. **Include email records**: Monitor MX, SPF, DKIM for email-dependent services.
|
||||
3. **Check CNAME chains**: If you use CNAME records, monitor them separately.
|
||||
|
||||
### Value Configuration {#best-practices-values}
|
||||
|
||||
1. **Use exact values**: DNS comparisons are exact string matches.
|
||||
2. **Account for multiple records**: CDNs and load balancers often return multiple IPs.
|
||||
3. **Update after changes**: Remember to update expected values after DNS changes.
|
||||
|
||||
### Match Type Selection {#best-practices-match-type}
|
||||
|
||||
1. **ANY for redundant systems**: Use when any one of several IPs is acceptable.
|
||||
2. **ALL for critical records**: Use when all specified records must exist.
|
||||
3. **Start with ANY**: It's more forgiving for dynamic DNS environments.
|
||||
|
||||
### Name Server Selection {#best-practices-nameserver}
|
||||
|
||||
1. **Use multiple monitors**: Monitor from different DNS servers for comprehensive coverage.
|
||||
2. **Consider caching**: Public DNS servers cache results; authoritative NS gives real-time data.
|
||||
3. **Match your users**: Use the same DNS servers your users likely use.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :------------------------------ | :---------------------------------------- | :--------------------------------------------- |
|
||||
| Always DOWN with correct values | Exact value mismatch (whitespace, case) | Copy exact value from DNS lookup tool |
|
||||
| Intermittent failures | DNS propagation in progress | Wait for propagation or query authoritative NS |
|
||||
| Timeout errors | Name server unreachable | Check network connectivity, try different NS |
|
||||
| No records returned | Wrong record type or domain doesn't exist | Verify record type and domain spelling |
|
||||
| MX record mismatch | Expecting IP but MX returns hostname | Use the mail server hostname, not IP |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Verify with dig/nslookup**:
|
||||
|
||||
```bash
|
||||
dig @8.8.8.8 example.com A
|
||||
dig @8.8.8.8 example.com TXT
|
||||
nslookup -type=MX example.com 8.8.8.8
|
||||
```
|
||||
|
||||
2. **Check exact response values**: DNS responses must match exactly. Use quotes around TXT records.
|
||||
|
||||
3. **Query authoritative NS**: Find and query the authoritative name server directly:
|
||||
|
||||
```bash
|
||||
dig NS example.com
|
||||
dig @ns1.example.com example.com A
|
||||
```
|
||||
|
||||
4. **Monitor DNS propagation**: Use tools like [dnschecker.org](https://dnschecker.org) to verify global propagation.
|
||||
|
||||
### Latency Considerations {#latency-considerations}
|
||||
|
||||
DNS query latency depends on:
|
||||
|
||||
- **Distance to name server**: Closer servers respond faster.
|
||||
- **DNS server load**: Public DNS servers may have variable response times.
|
||||
- **Record complexity**: Some record types require additional lookups.
|
||||
- **Network conditions**: UDP packets may be delayed or lost.
|
||||
|
||||
Typical DNS query times:
|
||||
|
||||
- Local/cached: 1-10ms
|
||||
- Same region: 10-50ms
|
||||
- Cross-region: 50-200ms
|
||||
- Cross-continent: 100-300ms
|
||||
- **Unexpected DOWN**: copy exact record output (after normalization rules)
|
||||
- **No response**: check `lookupRecord` type and resolver reachability
|
||||
- **Partial mismatches**: use `ANY` for multi-value dynamic DNS setups
|
||||
|
||||
@@ -1,464 +1,76 @@
|
||||
---
|
||||
title: GameDig Monitor
|
||||
description: Monitor game servers for over 300 games using the GameDig protocol
|
||||
description: Monitor game server availability using the GameDig query protocol
|
||||
---
|
||||
|
||||
GameDig monitors allow you to track the health and status of game servers. Using the [GameDig](https://github.com/gamedig/node-gamedig) library, Kener can query game servers for over 300 different games, including popular titles like Minecraft, Counter-Strike, ARK, Valheim, and many more.
|
||||
GameDig monitors query game servers and evaluate the response.
|
||||
|
||||
## How GameDig Monitoring Works {#how-gamedig-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's GameDig monitoring follows this workflow:
|
||||
Set:
|
||||
|
||||
1. **Query Server**: Kener sends a query to the game server using the appropriate game protocol.
|
||||
2. **Receive Response**: The server responds with status information (players, map, ping, etc.).
|
||||
3. **Measure Latency**: Response time is recorded as the server's ping.
|
||||
4. **Evaluate Status**: The custom eval function receives the response data to determine monitor status.
|
||||
- `gameId`
|
||||
- `host`
|
||||
- `port`
|
||||
- `timeout` (default `10000` ms)
|
||||
|
||||
### GameDig Check Process {#gamedig-check-process}
|
||||
Optional flags:
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐
|
||||
│ Kener │ ──── Game Protocol ───► │ Game │
|
||||
│ Monitor │ (UDP/TCP Query) │ Server │
|
||||
└─────────────┘ └─────────────┘
|
||||
│ │
|
||||
│ ┌─────────────────────────────┐ │
|
||||
└────│ Query with: │────┘
|
||||
│ - Game type (minecraft) │
|
||||
│ - Host (mc.example.com) │
|
||||
│ - Port (25565) │
|
||||
│ - Timeout (10s) │
|
||||
└─────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────┐
|
||||
│ Response: │
|
||||
│ - ping: 45ms │
|
||||
│ - players: 12/20 │
|
||||
│ - map: "world" │
|
||||
│ - raw: { ... } │
|
||||
└─────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────┐
|
||||
│ Custom Eval Function │
|
||||
│ (responseTime, responseRaw) │
|
||||
└─────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────┐
|
||||
│ Result: { status, latency } │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
- `guessPort` (default `false`)
|
||||
- `requestRules` (default `false`)
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :---------------- | :--------- | :----------------------------------------------------------------------- | :------------ |
|
||||
| **Game** | `string` | The game type identifier from the supported games list. | (Required) |
|
||||
| **Host** | `string` | Hostname or IP address of the game server. | (Required) |
|
||||
| **Port** | `number` | Query port of the game server (may differ from game port). | Game default |
|
||||
| **Timeout** | `number` | Maximum time in milliseconds to wait for a response. | `10000` |
|
||||
| **Guess Port** | `boolean` | Try alternative ports if the specified port doesn't respond. | `false` |
|
||||
| **Request Rules** | `boolean` | Request additional "rules" data from Valve games (may increase latency). | `false` |
|
||||
| **Custom Eval** | `function` | JavaScript function to evaluate response and determine status. | Default logic |
|
||||
| Field | Type | Default | Notes |
|
||||
| :------------- | :--------------------- | :----------------- | :----------------------------------- |
|
||||
| `gameId` | `string` | first game in list | Required |
|
||||
| `host` | `string` | — | Required |
|
||||
| `port` | `number` | `27015` | Depends on game |
|
||||
| `timeout` | `number` | `10000` | Must be >= 2000 in form validation |
|
||||
| `guessPort` | `boolean` | `false` | Allows alternate query port attempts |
|
||||
| `requestRules` | `boolean` | `false` | Includes additional rules data |
|
||||
| `eval` | `string` (JS function) | built-in default | Optional |
|
||||
|
||||
## Supported Games {#supported-games}
|
||||
## Default eval behavior {#default-eval}
|
||||
|
||||
GameDig supports over 300 games. Here are some popular ones:
|
||||
If query succeeds, default eval returns:
|
||||
|
||||
### Popular Games {#popular-games}
|
||||
- status **UP**
|
||||
- latency = `responseTime`
|
||||
|
||||
| Game | ID | Default Port | Protocol |
|
||||
| :--------------------- | :--------------- | :----------- | :-------- |
|
||||
| Minecraft | `minecraft` | 25565 | Minecraft |
|
||||
| Minecraft Bedrock | `minecraftbe` | 19132 | Bedrock |
|
||||
| Counter-Strike 2 | `cs2` | 27015 | Valve |
|
||||
| Counter-Strike: GO | `csgo` | 27015 | Valve |
|
||||
| Valheim | `valheim` | 2457 | Valve |
|
||||
| ARK: Survival Evolved | `ase` | 27015 | Valve |
|
||||
| ARK: Survival Ascended | `asa` | 27015 | ASA |
|
||||
| Rust | `rust` | 28015 | Valve |
|
||||
| Team Fortress 2 | `tf2` | 27015 | Valve |
|
||||
| Garry's Mod | `garrysmod` | 27015 | Valve |
|
||||
| 7 Days to Die | `sdtd` | 26900 | Valve |
|
||||
| DayZ | `dayz` | 27016 | Valve |
|
||||
| Terraria (TShock) | `terrariatshock` | 7777 | Terraria |
|
||||
| V Rising | `vrising` | 27015 | Valve |
|
||||
| Enshrouded | `enshrouded` | 15637 | Valve |
|
||||
| Palworld | `palworld` | 8211 | Palworld |
|
||||
If query fails or eval throws, monitor returns **DOWN**.
|
||||
|
||||
### Finding Your Game {#finding-your-game}
|
||||
## Custom eval contract {#custom-eval-contract}
|
||||
|
||||
The full list of supported games is available in the Kener UI when configuring a GameDig monitor. You can search by game name to find the correct game ID.
|
||||
Function input:
|
||||
|
||||
For the complete list, see the [GameDig documentation](https://github.com/gamedig/node-gamedig#games-list).
|
||||
- `responseTime`
|
||||
- `responseRaw`
|
||||
|
||||
## Response Structure {#response-structure}
|
||||
|
||||
The GameDig query returns:
|
||||
|
||||
- `responseTime` (number): Server response time in milliseconds (ping).
|
||||
- `responseRaw` (object): Raw response data from the server.
|
||||
|
||||
The `responseRaw` object typically contains:
|
||||
Return:
|
||||
|
||||
```javascript
|
||||
{
|
||||
name: "My Awesome Server", // Server name
|
||||
map: "de_dust2", // Current map
|
||||
password: false, // Password protected
|
||||
numplayers: 12, // Current players
|
||||
maxplayers: 24, // Max players
|
||||
players: [ // Player list
|
||||
{ name: "Player1", score: 10 },
|
||||
{ name: "Player2", score: 5 }
|
||||
],
|
||||
bots: [], // Bot list
|
||||
connect: "192.168.1.1:27015", // Connection string
|
||||
ping: 45, // Server ping
|
||||
raw: { // Protocol-specific raw data
|
||||
// Varies by game
|
||||
}
|
||||
}
|
||||
{ status: "UP" | "DEGRADED" | "DOWN" | "MAINTENANCE", latency: number }
|
||||
```
|
||||
|
||||
## Custom Evaluation {#custom-evaluation}
|
||||
|
||||
The evaluation function allows you to define custom logic for determining monitor status.
|
||||
|
||||
### Function Signature {#function-signature}
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
// Your evaluation logic
|
||||
return {
|
||||
status: "UP" | "DOWN" | "DEGRADED",
|
||||
latency: number
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### Parameters {#parameters}
|
||||
|
||||
| Parameter | Type | Description |
|
||||
| :------------- | :------- | :------------------------------------------- |
|
||||
| `responseTime` | `number` | Server response time (ping) in milliseconds. |
|
||||
| `responseRaw` | `object` | Raw response data from the game server. |
|
||||
|
||||
### Default Implementation {#default-implementation}
|
||||
|
||||
Here is the default logic used if you don't provide a custom function:
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
return {
|
||||
status: "UP",
|
||||
latency: responseTime
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
**Default Behavior:**
|
||||
|
||||
- **UP**: Server responded to query
|
||||
- **DOWN**: Server failed to respond (handled before eval runs)
|
||||
- **Latency**: Server ping time
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic Minecraft Server {#basic-minecraft-server}
|
||||
|
||||
Monitor a Minecraft Java server.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "minecraft-main",
|
||||
"name": "Minecraft Server",
|
||||
"type": "GAMEDIG",
|
||||
"type_data": {
|
||||
"gameId": "minecraft",
|
||||
"host": "mc.example.com",
|
||||
"port": 25565,
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Minecraft Bedrock Server {#minecraft-bedrock-server}
|
||||
|
||||
Monitor a Minecraft Bedrock/Pocket Edition server.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "minecraft-bedrock",
|
||||
"name": "Bedrock Server",
|
||||
"type": "GAMEDIG",
|
||||
"type_data": {
|
||||
"gameId": "minecraftbe",
|
||||
"host": "bedrock.example.com",
|
||||
"port": 19132,
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Counter-Strike 2 Server {#counter-strike-2-server}
|
||||
|
||||
Monitor a CS2 server.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "cs2-competitive",
|
||||
"name": "CS2 Competitive",
|
||||
"type": "GAMEDIG",
|
||||
"type_data": {
|
||||
"gameId": "cs2",
|
||||
"host": "cs2.example.com",
|
||||
"port": 27015,
|
||||
"timeout": 10000,
|
||||
"guessPort": true
|
||||
"guessPort": false,
|
||||
"requestRules": false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Valheim Server {#valheim-server}
|
||||
|
||||
Monitor a Valheim dedicated server.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "valheim-server",
|
||||
"name": "Valheim Server",
|
||||
"type": "GAMEDIG",
|
||||
"type_data": {
|
||||
"gameId": "valheim",
|
||||
"host": "valheim.example.com",
|
||||
"port": 2457,
|
||||
"timeout": 15000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. ARK: Survival Evolved Server {#ark-survival-evolved-server}
|
||||
|
||||
Monitor an ARK server with rules request.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "ark-server",
|
||||
"name": "ARK Server",
|
||||
"type": "GAMEDIG",
|
||||
"type_data": {
|
||||
"gameId": "ase",
|
||||
"host": "ark.example.com",
|
||||
"port": 27015,
|
||||
"timeout": 15000,
|
||||
"guessPort": true,
|
||||
"requestRules": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. Rust Server {#rust-server}
|
||||
|
||||
Monitor a Rust dedicated server.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "rust-server",
|
||||
"name": "Rust Server",
|
||||
"type": "GAMEDIG",
|
||||
"type_data": {
|
||||
"gameId": "rust",
|
||||
"host": "rust.example.com",
|
||||
"port": 28015,
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Evaluation Examples {#advanced-evaluation-examples}
|
||||
|
||||
### 7. Player Count Threshold {#player-count-threshold}
|
||||
|
||||
Mark as DEGRADED if server is nearly full.
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
const playerRatio = responseRaw.numplayers / responseRaw.maxplayers
|
||||
|
||||
// Server nearly full
|
||||
if (playerRatio > 0.9) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
### 8. High Ping Detection {#high-ping-detection}
|
||||
|
||||
Mark as DEGRADED if server ping is too high.
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
const PING_THRESHOLD = 150 // ms
|
||||
|
||||
if (responseTime > PING_THRESHOLD) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
### 9. Empty Server Alert {#empty-server-alert}
|
||||
|
||||
Mark as DEGRADED if server has no players (might indicate issues).
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
// Server empty - might be having issues
|
||||
if (responseRaw.numplayers === 0) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
### 10. Map Verification {#map-verification}
|
||||
|
||||
Verify server is running expected map.
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
const expectedMaps = ["de_dust2", "de_mirage", "de_inferno"]
|
||||
|
||||
if (!expectedMaps.includes(responseRaw.map)) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
### 11. Password Protection Check {#password-protection-check}
|
||||
|
||||
Alert if server becomes password protected unexpectedly.
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
// Public server shouldn't have password
|
||||
if (responseRaw.password === true) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
### 12. Combined Checks {#combined-checks}
|
||||
|
||||
Multiple conditions for comprehensive monitoring.
|
||||
|
||||
```javascript
|
||||
;(async function (responseTime, responseRaw) {
|
||||
const issues = []
|
||||
|
||||
// High ping
|
||||
if (responseTime > 200) {
|
||||
issues.push("high_ping")
|
||||
}
|
||||
|
||||
// Server full
|
||||
if (responseRaw.numplayers >= responseRaw.maxplayers) {
|
||||
issues.push("full")
|
||||
}
|
||||
|
||||
// Wrong map
|
||||
if (responseRaw.map === "workshop") {
|
||||
issues.push("wrong_map")
|
||||
}
|
||||
|
||||
if (issues.length > 1) {
|
||||
return { status: "DOWN", latency: responseTime }
|
||||
} else if (issues.length === 1) {
|
||||
return { status: "DEGRADED", latency: responseTime }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: responseTime }
|
||||
})
|
||||
```
|
||||
|
||||
## Port Configuration {#port-configuration}
|
||||
|
||||
Game servers often have different ports for gameplay and queries:
|
||||
|
||||
| Game | Game Port | Query Port | Notes |
|
||||
| :----------- | :-------- | :--------- | :------------------------- |
|
||||
| Minecraft | 25565 | 25565 | Same port |
|
||||
| Source games | 27015 | 27015 | Same port (Valve protocol) |
|
||||
| ARK | 7777 | 27015 | Different ports |
|
||||
| Rust | 28015 | 28015 | Same port |
|
||||
| FiveM | 30120 | 30120 | Same port |
|
||||
|
||||
Use the **Guess Port** option if unsure about the query port.
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Timeout Configuration {#best-practices-timeout}
|
||||
|
||||
| Server Location | Recommended Timeout | Rationale |
|
||||
| :-------------- | :------------------ | :---------------------- |
|
||||
| Local network | 5000ms | Low latency expected |
|
||||
| Same region | 10000ms | Standard timeout |
|
||||
| Cross-region | 15000ms | Higher latency expected |
|
||||
| Unreliable host | 20000ms | Allow for packet loss |
|
||||
|
||||
### Query Port {#best-practices-query-port}
|
||||
|
||||
1. **Check game documentation**: Query port often differs from game port.
|
||||
2. **Use Guess Port**: Enable if you're unsure about the port.
|
||||
3. **Verify with tools**: Use GameDig CLI or online query tools to test.
|
||||
|
||||
### Evaluation Logic {#best-practices-evaluation}
|
||||
|
||||
1. **Keep it simple**: Basic UP/DOWN is often sufficient.
|
||||
2. **Use DEGRADED wisely**: For high ping, near-full servers, etc.
|
||||
3. **Don't over-complicate**: Focus on critical server health indicators.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :------------------ | :-------------------------------- | :------------------------------------ |
|
||||
| Always DOWN | Wrong game ID or port | Verify game ID and query port |
|
||||
| Timeout errors | Firewall blocking queries | Check firewall allows UDP/TCP queries |
|
||||
| Wrong data returned | Query port vs game port confusion | Enable "Guess Port" option |
|
||||
| Slow response | Server overloaded or far away | Increase timeout |
|
||||
| No player data | Server privacy settings | Check server query settings |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Test with GameDig CLI**:
|
||||
|
||||
```bash
|
||||
npx gamedig --type minecraft mc.example.com:25565
|
||||
```
|
||||
|
||||
2. **Check server settings**: Ensure queries are enabled on the game server.
|
||||
|
||||
3. **Verify firewall**: UDP queries need specific ports open.
|
||||
|
||||
4. **Try Guess Port**: Different games use different query ports.
|
||||
|
||||
### Valve Game Specifics {#valve-game-specifics}
|
||||
|
||||
For Valve games (CS2, TF2, Garry's Mod, etc.):
|
||||
|
||||
1. **Query port**: Usually same as game port (27015).
|
||||
2. **Request Rules**: Enable for additional server data (may increase latency).
|
||||
3. **Rate limiting**: Valve servers may rate-limit queries.
|
||||
- **Always DOWN**: wrong `gameId`/query port or blocked UDP/TCP query traffic
|
||||
- **High latency**: increase timeout and verify server region/network path
|
||||
- **Missing details**: enable `requestRules` for supported games
|
||||
|
||||
@@ -1,229 +1,68 @@
|
||||
---
|
||||
title: Group Monitor
|
||||
description: Aggregate multiple monitors into a single status view using weighted scoring
|
||||
description: Combine multiple monitors into a weighted aggregate status
|
||||
---
|
||||
|
||||
Group monitors allow you to combine multiple monitors into a unified status view. Instead of checking individual services, a group monitor aggregates the status of its member monitors using a weighted scoring system. This is ideal for representing complex systems where different components have varying levels of importance to the overall service health.
|
||||
Group monitors aggregate member monitor status and latency into one monitor.
|
||||
|
||||
## How Group Monitoring Works {#how-group-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's Group monitoring follows this workflow:
|
||||
Requirements from current validation:
|
||||
|
||||
1. **Collect Member Status**: Group monitor retrieves the latest status of each configured member monitor.
|
||||
2. **Calculate Weighted Score**: Each status (UP, DEGRADED, DOWN, MAINTENANCE) has a numeric score. The group calculates a weighted average based on member weights.
|
||||
3. **Map to Status**: The weighted score is mapped back to a group status (UP, DEGRADED, DOWN, or MAINTENANCE).
|
||||
4. **Aggregate Latency**: Member latencies are combined using the selected calculation method (AVG, MAX, or MIN).
|
||||
- at least **2** member monitors
|
||||
- members must be active, non-group monitors
|
||||
- weights must sum to approximately `1` (tolerance `0.01`)
|
||||
- `executionDelay` must be `>= 1000` ms
|
||||
- `latencyCalculation` in `AVG | MAX | MIN`
|
||||
|
||||
### Status Scoring System {#status-scoring-system}
|
||||
## Status scoring model {#status-scoring-model}
|
||||
|
||||
Each monitor status has a numeric score used in weighted calculations:
|
||||
Member statuses are scored as:
|
||||
|
||||
| Status | Score | Meaning |
|
||||
| :-------------- | :---- | :------------------------------- |
|
||||
| **UP** | `0` | Service operating normally |
|
||||
| **DEGRADED** | `1` | Service experiencing issues |
|
||||
| **DOWN** | `2` | Service unavailable |
|
||||
| **MAINTENANCE** | `3` | Service in scheduled maintenance |
|
||||
- `UP = 0`
|
||||
- `DEGRADED = 1`
|
||||
- `DOWN = 2`
|
||||
- `MAINTENANCE = 3`
|
||||
|
||||
### Weighted Score Calculation {#weighted-score-calculation}
|
||||
|
||||
The group status is determined by:
|
||||
Weighted score:
|
||||
|
||||
```
|
||||
Weighted Score = Σ(monitor_weight × monitor_status_score)
|
||||
|
||||
where weights must sum to 1.0
|
||||
Σ(member.weight * statusScore)
|
||||
```
|
||||
|
||||
The weighted score is then mapped back to a status:
|
||||
Mapped group status:
|
||||
|
||||
| Score Range | Group Status |
|
||||
| :---------------- | :-------------- |
|
||||
| `< 1.0` | **UP** |
|
||||
| `≥ 1.0 and < 2.0` | **DEGRADED** |
|
||||
| `≥ 2.0 and < 3.0` | **DOWN** |
|
||||
| `≥ 3.0` | **MAINTENANCE** |
|
||||
- `< 1` → **UP**
|
||||
- `>= 1` and `< 2` → **DEGRADED**
|
||||
- `>= 2` and `< 3` → **DOWN**
|
||||
- `>= 3` → **MAINTENANCE**
|
||||
|
||||
### Example Calculation {#example-calculation}
|
||||
## Latency aggregation {#latency-aggregation}
|
||||
|
||||
Given three monitors with weights:
|
||||
Group latency uses selected mode:
|
||||
|
||||
- **API** (weight: 0.5): DOWN (score: 2)
|
||||
- **Database** (weight: 0.3): UP (score: 0)
|
||||
- **Cache** (weight: 0.2): UP (score: 0)
|
||||
- `AVG`: average member latency
|
||||
- `MAX`: slowest member latency
|
||||
- `MIN`: fastest member latency
|
||||
|
||||
```
|
||||
Weighted Score = (0.5 × 2) + (0.3 × 0) + (0.2 × 0) = 1.0
|
||||
Result: DEGRADED
|
||||
```
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
The group is marked **DEGRADED** because the high-weight API monitor is down, even though two other services are up.
|
||||
| Field | Type | Default | Notes |
|
||||
| :------------------- | :--------------------- | :------ | :-------------- |
|
||||
| `monitors` | `Array<{tag, weight}>` | `[]` | Required, min 2 |
|
||||
| `executionDelay` | `number` | `1000` | Must be >= 1000 |
|
||||
| `latencyCalculation` | `AVG\|MAX\|MIN` | `AVG` | |
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :---------------------- | :------- | :---------------------------------------------------------------------------------------------------------------- | :------ |
|
||||
| **Monitors** | `array` | List of member monitors with their tags and weights. Weights must sum to 1. | `[]` |
|
||||
| **Execution Delay** | `number` | Delay in milliseconds before the group monitor executes, allowing member monitors to complete their checks first. | `1000` |
|
||||
| **Latency Calculation** | `string` | How to calculate group latency from members: `AVG` (average), `MAX`, or `MIN`. | `AVG` |
|
||||
|
||||
### Execution Delay {#execution-delay}
|
||||
|
||||
The **executionDelay** parameter is critical for group monitors:
|
||||
|
||||
- **Purpose**: Delays the group monitor execution to ensure all member monitors have completed their checks for the current minute.
|
||||
- **Why It Matters**: Group monitors aggregate status from member monitors. If the group runs too early, it may read stale data from the previous minute, resulting in incorrect status calculations.
|
||||
- **Timing**: Member monitors run at the start of each minute (cron schedule). Setting executionDelay to 1000ms (1 second) or higher ensures member data is fresh.
|
||||
- **Recommendation**: Set executionDelay higher than the slowest member monitor's expected execution time. For example, if your API monitor has a 500ms timeout, use 1000ms or more for the group.
|
||||
|
||||
> [!WARNING]
|
||||
> Setting executionDelay too low (e.g., 100ms) can cause the group to aggregate stale data, making the group status lag behind actual member statuses.
|
||||
|
||||
### Monitor Weights {#monitor-weights}
|
||||
|
||||
Each member monitor in a group has a **weight** between `0` and `1`. The weights determine how much each monitor influences the overall group status.
|
||||
|
||||
- **Higher weight** = Greater impact on group status
|
||||
- **Lower weight** = Lesser impact on group status
|
||||
- **All weights must sum to 1.0**
|
||||
|
||||
> [!NOTE]
|
||||
> Groups cannot contain other group monitors. Only non-group, active monitors can be added to a group.
|
||||
|
||||
## Weight Assignment Strategies {#weight-assignment-strategies}
|
||||
|
||||
### Equal Weighting {#equal-weighting}
|
||||
|
||||
All monitors have equal importance:
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"monitors": [
|
||||
{ "tag": "api", "weight": 0.333 },
|
||||
{ "tag": "database", "weight": 0.333 },
|
||||
{ "tag": "cache", "weight": 0.334 }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Each monitor contributes equally to the group status.
|
||||
|
||||
### Critical Component Weighting {#critical-component-weighting}
|
||||
|
||||
Primary service has higher weight:
|
||||
|
||||
```json
|
||||
{
|
||||
"monitors": [
|
||||
{ "tag": "api", "weight": 0.6 },
|
||||
{ "tag": "database", "weight": 0.25 },
|
||||
{ "tag": "cache", "weight": 0.15 }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The API has more influence on overall status than supporting services.
|
||||
|
||||
### Tiered Importance {#tiered-importance}
|
||||
|
||||
```json
|
||||
{
|
||||
"monitors": [
|
||||
{ "tag": "core-api", "weight": 0.5 },
|
||||
{ "tag": "database", "weight": 0.3 },
|
||||
{ "tag": "cdn", "weight": 0.15 },
|
||||
{ "tag": "analytics", "weight": 0.05 }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Core services have higher weights, while auxiliary services have minimal impact.
|
||||
|
||||
## Latency Calculation Methods {#latency-calculation-methods}
|
||||
|
||||
Group monitors can aggregate member latencies using different methods:
|
||||
|
||||
| Method | Description | Use Case |
|
||||
| :------ | :-------------------------------------- | :------------------------------ |
|
||||
| **AVG** | Average latency of all member monitors | General performance overview |
|
||||
| **MAX** | Highest latency among members (slowest) | Worst-case performance tracking |
|
||||
| **MIN** | Lowest latency among members (fastest) | Best-case performance tracking |
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic Web Application Stack {#basic-web-application-stack}
|
||||
|
||||
Monitor a typical web app with equal weights:
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "webapp-stack",
|
||||
"name": "Web Application",
|
||||
"type": "GROUP",
|
||||
"cron": "* * * * *",
|
||||
"type_data": {
|
||||
"monitors": [
|
||||
{ "tag": "frontend", "weight": 0.333 },
|
||||
{ "tag": "api-backend", "weight": 0.333 },
|
||||
{ "tag": "database", "weight": 0.334 }
|
||||
],
|
||||
"executionDelay": 1000,
|
||||
"latencyCalculation": "AVG"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
|
||||
- All three components equally affect the group status
|
||||
- If any one component goes DOWN (score 2), weighted score is ~0.667 → Group remains **UP** but approaching **DEGRADED**
|
||||
- If two components go DOWN, weighted score is ~1.333 → Group becomes **DEGRADED**
|
||||
|
||||
### 2. Critical Service with Dependencies {#critical-service-with-dependencies}
|
||||
|
||||
Primary API is critical, supporting services are less impactful:
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "payment-service",
|
||||
"name": "Payment Processing",
|
||||
"type": "GROUP",
|
||||
"cron": "* * * * *",
|
||||
"type_data": {
|
||||
"monitors": [
|
||||
{ "tag": "payment-api", "weight": 0.7 },
|
||||
{ "tag": "fraud-check", "weight": 0.2 },
|
||||
{ "tag": "notification", "weight": 0.1 }
|
||||
],
|
||||
"executionDelay": 2000,
|
||||
"latencyCalculation": "MAX"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
|
||||
- If payment-api goes DOWN (score 2): `0.7 × 2 = 1.4` → Group is **DEGRADED**
|
||||
- If only notification goes DOWN: `0.1 × 2 = 0.2` → Group remains **UP**
|
||||
- Uses MAX latency to track the slowest component
|
||||
|
||||
### 3. E-commerce Platform {#e-commerce-platform}
|
||||
|
||||
Multi-tier application with varying weights:
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "ecommerce-platform",
|
||||
"name": "E-Commerce Platform",
|
||||
"type": "GROUP",
|
||||
"cron": "*/2 * * * *",
|
||||
"type_data": {
|
||||
"monitors": [
|
||||
{ "tag": "storefront", "weight": 0.4 },
|
||||
{ "tag": "product-api", "weight": 0.25 },
|
||||
{ "tag": "checkout", "weight": 0.2 },
|
||||
{ "tag": "search", "weight": 0.1 },
|
||||
{ "tag": "recommendations", "weight": 0.05 }
|
||||
{ "tag": "api", "weight": 0.6 },
|
||||
{ "tag": "db", "weight": 0.3 },
|
||||
{ "tag": "cache", "weight": 0.1 }
|
||||
],
|
||||
"executionDelay": 1500,
|
||||
"latencyCalculation": "AVG"
|
||||
@@ -231,204 +70,8 @@ Multi-tier application with varying weights:
|
||||
}
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
|
||||
- Storefront and core APIs have high weight
|
||||
- Search being down only contributes `0.1 × 2 = 0.2` to the score
|
||||
- Recommendations being down adds just `0.05 × 2 = 0.1` (minimal impact)
|
||||
|
||||
### 4. Gradual Degradation Example {#gradual-degradation-example}
|
||||
|
||||
Understanding how multiple failures affect group status:
|
||||
|
||||
With equal weights (0.25 each for 4 monitors):
|
||||
|
||||
| Failed Monitors | Calculation | Score | Status |
|
||||
| :-------------- | :--------------------------- | :----- | :------- |
|
||||
| None | `0` | `0.0` | UP |
|
||||
| 1 DOWN | `0.25 × 2 = 0.5` | `0.5` | UP |
|
||||
| 2 DOWN | `0.5 × 2 = 1.0` | `1.0` | DEGRADED |
|
||||
| 3 DOWN | `0.75 × 2 = 1.5` | `1.5` | DEGRADED |
|
||||
| 4 DOWN | `1.0 × 2 = 2.0` | `2.0` | DOWN |
|
||||
| 2 DEGRADED | `0.5 × 1 = 0.5` | `0.5` | UP |
|
||||
| 1 DOWN + 1 DEG | `0.25 × 2 + 0.25 × 1 = 0.75` | `0.75` | UP |
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Weight Assignment {#best-practices-weight-assignment}
|
||||
|
||||
1. **Identify Critical Components**: Services that can bring down the entire system should have higher weights (0.5-0.7).
|
||||
2. **Sum to 1.0**: Always ensure weights add up to exactly 1.0.
|
||||
3. **Use the "Distribute Equally" Button**: The UI provides a button to auto-calculate equal weights.
|
||||
4. **Round to 2-3 Decimals**: Avoid overly precise weights like `0.142857` — round to `0.143`.
|
||||
|
||||
### Group Organization {#best-practices-group-organization}
|
||||
|
||||
1. **Logical Grouping**: Group monitors that actually belong together (e.g., all microservices for one product).
|
||||
2. **Avoid Deep Nesting**: Groups cannot contain other groups — keep hierarchies flat.
|
||||
3. **Limit Group Size**: 3-7 monitors per group is ideal. Too many makes weights hard to reason about.
|
||||
4. **Name Clearly**: Use descriptive names like "Payment Stack" not "Group 1".
|
||||
|
||||
### Monitoring Strategy {#best-practices-monitoring-strategy}
|
||||
|
||||
1. **Check Frequency**: Group monitors should run at least as often as their slowest member.
|
||||
2. **Execution Delay**: Set executionDelay higher than your slowest member monitor's timeout + processing time. For example:
|
||||
- If API monitor has 500ms timeout: use 1000ms+ executionDelay
|
||||
- If SQL monitor has 2000ms timeout: use 2500ms+ executionDelay
|
||||
- Default 1000ms works for most monitors with timeouts under 500ms
|
||||
3. **Latency Method**:
|
||||
- Use **AVG** for general overview
|
||||
- Use **MAX** when you care about the slowest component
|
||||
- Use **MIN** rarely (only when fastest matters)
|
||||
|
||||
### Weight Scenarios {#best-practices-weight-scenarios}
|
||||
|
||||
**Scenario: Primary + Backup**
|
||||
|
||||
```json
|
||||
{ "primary": 0.8, "backup": 0.2 }
|
||||
```
|
||||
|
||||
Backup has low weight since it only matters when primary fails.
|
||||
|
||||
**Scenario: Load-Balanced Services**
|
||||
|
||||
```json
|
||||
{ "server1": 0.5, "server2": 0.5 }
|
||||
```
|
||||
|
||||
Both servers equally important for availability.
|
||||
|
||||
**Scenario: Microservices with Shared Database**
|
||||
|
||||
```json
|
||||
{ "api1": 0.25, "api2": 0.25, "api3": 0.25, "database": 0.25 }
|
||||
```
|
||||
|
||||
All components equally critical.
|
||||
|
||||
## Common Patterns {#common-patterns}
|
||||
|
||||
### Pattern 1: Frontend + Backend + Database {#pattern-frontend-backend-database}
|
||||
|
||||
```
|
||||
Frontend (0.3) ──► Backend (0.5) ──► Database (0.2)
|
||||
```
|
||||
|
||||
Backend has highest weight as it's the core business logic. Database lower because issues often manifest in backend first.
|
||||
|
||||
### Pattern 2: Microservices Architecture {#pattern-microservices}
|
||||
|
||||
```
|
||||
Gateway (0.4)
|
||||
├─► Service A (0.2)
|
||||
├─► Service B (0.2)
|
||||
└─► Service C (0.2)
|
||||
```
|
||||
|
||||
Gateway is critical as it's the entry point. Services have equal weight.
|
||||
|
||||
### Pattern 3: CDN + Origin {#pattern-cdn-origin}
|
||||
|
||||
```
|
||||
CDN (0.3) ──► Origin (0.7)
|
||||
```
|
||||
|
||||
Origin is more critical since CDN failures can be worked around, but origin failures are total.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Group Stays UP When Members Are DOWN {#troubleshooting-up-when-down}
|
||||
|
||||
**Problem**: Group shows UP even though some members are DOWN.
|
||||
|
||||
**Solution**: Check total weighted score. With low weights, a single DOWN monitor may not push the score to 1.0:
|
||||
|
||||
- `0.2 × 2 = 0.4` (still < 1.0, so UP)
|
||||
- Increase weights for critical monitors or adjust thresholds.
|
||||
|
||||
### Group Always Shows DEGRADED {#troubleshooting-always-degraded}
|
||||
|
||||
**Problem**: Group constantly shows DEGRADED status.
|
||||
|
||||
**Solution**:
|
||||
|
||||
- Check if member monitors frequently show DEGRADED status
|
||||
- Verify weight distribution — heavily weighted monitors have outsized impact
|
||||
- Review member monitor configurations for sensitivity
|
||||
|
||||
### Weights Don't Sum to 1.0 {#troubleshooting-weights-sum}
|
||||
|
||||
**Problem**: Cannot save group configuration.
|
||||
|
||||
**Solution**: Use the "Distribute Equally" button or manually adjust weights:
|
||||
|
||||
```
|
||||
Total: 0.99 → Add 0.01 to one monitor
|
||||
Total: 1.01 → Subtract 0.01 from one monitor
|
||||
```
|
||||
|
||||
### Missing Member Data {#troubleshooting-missing-data}
|
||||
|
||||
**Problem**: Group uses partial data or shows NO_DATA.
|
||||
|
||||
**Solution**:
|
||||
|
||||
- Ensure member monitors are running on schedule
|
||||
- Check that member monitors are active (not paused)
|
||||
- **Increase executionDelay** if member monitors take longer than expected to complete
|
||||
- Member monitors must complete before group aggregates
|
||||
- If a member has a 1000ms timeout, set group executionDelay to at least 1500ms
|
||||
|
||||
## Advanced Use Cases {#advanced-use-cases}
|
||||
|
||||
### Blue-Green Deployment Monitoring {#blue-green-deployment}
|
||||
|
||||
Monitor both environments with dynamic weighting:
|
||||
|
||||
**During Normal Operation:**
|
||||
|
||||
```json
|
||||
{ "blue": 1.0, "green": 0.0 }
|
||||
```
|
||||
|
||||
**During Deployment (50/50 traffic):**
|
||||
|
||||
```json
|
||||
{ "blue": 0.5, "green": 0.5 }
|
||||
```
|
||||
|
||||
**After Cutover:**
|
||||
|
||||
```json
|
||||
{ "blue": 0.0, "green": 1.0 }
|
||||
```
|
||||
|
||||
### Multi-Region Service {#multi-region-service}
|
||||
|
||||
Weight by traffic distribution:
|
||||
|
||||
```json
|
||||
{
|
||||
"us-east": 0.4,
|
||||
"us-west": 0.3,
|
||||
"eu-west": 0.2,
|
||||
"ap-south": 0.1
|
||||
}
|
||||
```
|
||||
|
||||
Reflects actual user impact if a region goes down.
|
||||
|
||||
### SLA-Based Weighting {#sla-based-weighting}
|
||||
|
||||
Weight by contractual importance:
|
||||
|
||||
```json
|
||||
{
|
||||
"enterprise-api": 0.6,
|
||||
"standard-api": 0.3,
|
||||
"free-api": 0.1
|
||||
}
|
||||
```
|
||||
|
||||
Prioritizes monitoring of revenue-generating tiers.
|
||||
- **Cannot save**: verify weights sum to `1`, min member count, and delay >= `1000`
|
||||
- **Unexpected stale status**: increase `executionDelay` so member checks finish first
|
||||
- **Group too optimistic/pessimistic**: rebalance weights toward critical components
|
||||
|
||||
@@ -1,377 +1,63 @@
|
||||
---
|
||||
title: Heartbeat Monitor
|
||||
description: Monitor cron jobs, scheduled tasks, and background processes with push-based heartbeat checks
|
||||
description: Push health signals from jobs, workers, and external systems
|
||||
---
|
||||
|
||||
Heartbeat monitors work differently from other monitor types - instead of Kener actively checking your service, your service sends periodic "heartbeat" signals to Kener. If Kener doesn't receive a heartbeat within the expected timeframe, it marks the service as degraded or down. This is ideal for monitoring cron jobs, scheduled tasks, batch processes, and background workers.
|
||||
Heartbeat monitors are push-based: your job calls a URL, and Kener measures how long it has been since the last signal.
|
||||
|
||||
## How Heartbeat Monitoring Works {#how-heartbeat-monitoring-works}
|
||||
## Heartbeat endpoint {#heartbeat-endpoint}
|
||||
|
||||
Kener's Heartbeat monitoring follows a push-based workflow:
|
||||
|
||||
1. **Kener Generates URL**: Each heartbeat monitor gets a unique URL.
|
||||
2. **Your Service Pings URL**: Your cron job or service sends HTTP requests to this URL.
|
||||
3. **Kener Records Heartbeat**: Each request records a timestamp.
|
||||
4. **Monitor Checks**: Kener's cron compares the last heartbeat time against the time now.
|
||||
5. **Status Determination**: Based on how late the heartbeat is, status is UP, DEGRADED, or DOWN.
|
||||
|
||||
### Heartbeat Check Process {#heartbeat-check-process}
|
||||
URL format:
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐
|
||||
│ Your Cron │ ──── GET/POST ────────► │ Kener │
|
||||
│ Job/Task │ /ext/heartbeat/tag:secret │ Server │
|
||||
└─────────────┘ └─────────────┘
|
||||
│ │
|
||||
│ Record
|
||||
│ Timestamp
|
||||
▼ ▼
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ Kener's Cron Check (every minute): │
|
||||
│ │
|
||||
│ Last received heartbeat: 14:05:00 │
|
||||
│ Current time: 14:12:00 │
|
||||
│ │
|
||||
│ Time Since Last Heartbeat: 7 minutes │
|
||||
│ Degraded threshold: 5 minutes → DEGRADED │
|
||||
│ Down threshold: 10 minutes → Still DEGRADED │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
/ext/heartbeat/{tag}:{secret}
|
||||
```
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
Accepted methods: `GET` and `POST`.
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :----------------------------- | :------- | :--------------------------------------------------------------- | :------ |
|
||||
| **Degraded Remaining Minutes** | `number` | Mark as DEGRADED if no heartbeat received for this many minutes. | `5` |
|
||||
| **Down Remaining Minutes** | `number` | Mark as DOWN if no heartbeat received for this many minutes. | `10` |
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
> [!NOTE]
|
||||
> Heartbeat status is evaluated using **time since the last received heartbeat**. The monitor's `cron` value is not used for heartbeat timing.
|
||||
Set:
|
||||
|
||||
## Heartbeat URL {#heartbeat-url}
|
||||
- `degradedRemainingMinutes` (default `5`)
|
||||
- `downRemainingMinutes` (default `10`)
|
||||
|
||||
Each heartbeat monitor gets a unique URL with a secret token:
|
||||
`downRemainingMinutes` must be greater than `degradedRemainingMinutes`.
|
||||
|
||||
```
|
||||
https://your-kener-instance.com/ext/heartbeat/{monitor-tag}:{secret-token}
|
||||
```
|
||||
## Status logic {#status-logic}
|
||||
|
||||
- **Method**: `GET` or `POST` (both work)
|
||||
- **Authentication**: No authentication required (URL contains the unique secret)
|
||||
- **Response**: Returns success status json
|
||||
If no heartbeat has ever been received:
|
||||
|
||||
## Status Evaluation Logic {#status-evaluation-logic}
|
||||
- status is **NO_DATA**
|
||||
|
||||
The heartbeat monitor uses time-based evaluation:
|
||||
Otherwise let `diff` = elapsed time since last heartbeat:
|
||||
|
||||
```javascript
|
||||
// Pseudocode for heartbeat status evaluation
|
||||
const lastHeartbeatMs = getLastHeartbeatTimestamp(monitor.tag) // timestamp in ms
|
||||
const nowMs = getCurrentTime() // timestamp in ms
|
||||
- `diff > downRemainingMinutes` → **DOWN**
|
||||
- `diff > degradedRemainingMinutes` → **DEGRADED**
|
||||
- otherwise → **UP**
|
||||
|
||||
// Calculate time since the last heartbeat
|
||||
const minutesSince = (nowMs - lastHeartbeatMs) / (60 * 1000)
|
||||
const latencyMs = nowMs - lastHeartbeatMs
|
||||
Latency is recorded as elapsed time since the last heartbeat (ms).
|
||||
|
||||
if (minutesSince > downRemainingMinutes) {
|
||||
return { status: "DOWN", latency: latencyMs }
|
||||
}
|
||||
|
||||
if (minutesSince > degradedRemainingMinutes) {
|
||||
return { status: "DEGRADED", latency: latencyMs }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: latencyMs }
|
||||
```
|
||||
|
||||
### Status Conditions {#status-conditions}
|
||||
|
||||
| Status | Condition | Meaning |
|
||||
| :----------- | :------------------------------------------------------ | :---------------------------------- |
|
||||
| **UP** | Heartbeat received on time or within degraded threshold | Service running normally |
|
||||
| **DEGRADED** | Late by more than degraded threshold but less than down | Service may be slow or struggling |
|
||||
| **DOWN** | Late by more than down threshold | Service likely failed |
|
||||
| **DOWN** | No heartbeat ever received | Service never started or configured |
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic Cron Job Monitoring {#basic-cron-job-monitoring}
|
||||
|
||||
Monitor a cron job that runs every 5 minutes.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "data-sync",
|
||||
"name": "Data Sync",
|
||||
"image": "/uploads/cron.png",
|
||||
"cron": "*/5 * * * *",
|
||||
"type": "HEARTBEAT",
|
||||
"type_data": {
|
||||
"degradedRemainingMinutes": 6,
|
||||
"degradedRemainingMinutes": 5,
|
||||
"downRemainingMinutes": 10
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Heartbeat URL**: `https://kener.example.com/ext/heartbeat/data-sync:SOME-SECRET-TOKEN`
|
||||
|
||||
**Add to your cron job**:
|
||||
Minimal cron usage pattern:
|
||||
|
||||
```bash
|
||||
*/5 * * * * /path/to/your/script.sh && curl -s https://kener.example.com/ext/heartbeat/data-sync:SOME-SECRET-TOKEN
|
||||
*/5 * * * * /path/to/job.sh && curl -s "https://your-kener-host/ext/heartbeat/my-job:my-secret"
|
||||
```
|
||||
|
||||
### 2. Hourly Backup Job {#hourly-backup-job}
|
||||
|
||||
Monitor a backup that runs every hour.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "hourly-backup",
|
||||
"name": "Hourly Backup",
|
||||
"image": "/uploads/backup.png",
|
||||
"cron": "* * * * *",
|
||||
"type": "HEARTBEAT",
|
||||
"type_data": {
|
||||
"degradedRemainingMinutes": 65,
|
||||
"downRemainingMinutes": 120
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Daily Report Generation {#daily-report-generation}
|
||||
|
||||
Monitor a daily report that runs at 2 AM.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "daily-report",
|
||||
"name": "Daily Report",
|
||||
"image": "/uploads/report.png",
|
||||
"cron": "* * * * *",
|
||||
"type": "HEARTBEAT",
|
||||
"type_data": {
|
||||
"degradedRemainingMinutes": 1500,
|
||||
"downRemainingMinutes": 3000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Background Worker Process {#background-worker-process}
|
||||
|
||||
Monitor a continuously running worker that should check in every minute.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "queue-worker",
|
||||
"name": "Queue Worker",
|
||||
"image": "/uploads/queue.png",
|
||||
"cron": "* * * * *",
|
||||
"type": "HEARTBEAT",
|
||||
"type_data": {
|
||||
"degradedRemainingMinutes": 2,
|
||||
"downRemainingMinutes": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Weekly Maintenance Task {#weekly-maintenance-task}
|
||||
|
||||
Monitor a weekly cleanup job that runs Sundays at 3 AM.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "weekly-cleanup",
|
||||
"name": "Weekly Cleanup",
|
||||
"image": "/uploads/cleanup.png",
|
||||
"cron": "* * * * *",
|
||||
"type": "HEARTBEAT",
|
||||
"type_data": {
|
||||
"degradedRemainingMinutes": 10100,
|
||||
"downRemainingMinutes": 20200
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Integration Examples {#integration-examples}
|
||||
|
||||
### Bash/Shell Script {#bash-integration}
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# your-script.sh
|
||||
|
||||
# Your actual task
|
||||
do_something_important
|
||||
|
||||
# Send heartbeat on success
|
||||
if [ $? -eq 0 ]; then
|
||||
curl -s https://kener.example.com/ext/heartbeat/your-monitor-tag:Secret-Token
|
||||
fi
|
||||
```
|
||||
|
||||
### Cron with Heartbeat {#cron-integration}
|
||||
|
||||
```cron
|
||||
# Run every 5 minutes, send heartbeat on success
|
||||
*/5 * * * * /path/to/script.sh && curl -s https://kener.example.com/ext/heartbeat/task-name:Secret-Token
|
||||
|
||||
# Run hourly, send heartbeat regardless (for long-running tasks)
|
||||
0 * * * * /path/to/backup.sh; curl -s https://kener.example.com/ext/heartbeat/backup:Secret-Token
|
||||
```
|
||||
|
||||
### Python Script {#python-integration}
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
def main():
|
||||
# Your task logic
|
||||
# ...
|
||||
requests.get("https://kener.example.com/ext/heartbeat/python-task:Secret-Token")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
### Node.js Script {#nodejs-integration}
|
||||
|
||||
```javascript
|
||||
const https = require("https")
|
||||
|
||||
async function main() {
|
||||
// Your task logic
|
||||
# ...
|
||||
https.get("https://kener.example.com/ext/heartbeat/node-task:Secret-Token")
|
||||
}
|
||||
|
||||
main()
|
||||
```
|
||||
|
||||
### Docker Container {#docker-integration}
|
||||
|
||||
```dockerfile
|
||||
# In your Dockerfile or entrypoint
|
||||
CMD ["sh", "-c", "node app.js && curl -s https://kener.example.com/ext/heartbeat/container-task:Secret-Token"]
|
||||
```
|
||||
|
||||
### Kubernetes CronJob {#kubernetes-integration}
|
||||
|
||||
```yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: data-sync
|
||||
spec:
|
||||
schedule: "*/5 * * * *"
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: data-sync
|
||||
image: your-image
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
/app/sync.sh
|
||||
curl -s https://kener.example.com/ext/heartbeat/data-sync:Secret-Token
|
||||
restartPolicy: OnFailure
|
||||
```
|
||||
|
||||
### GitHub Actions {#github-actions-integration}
|
||||
|
||||
```yaml
|
||||
name: Scheduled Task
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 * * * *" # Every hour
|
||||
|
||||
jobs:
|
||||
task:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Run task
|
||||
run: |
|
||||
# Your task here
|
||||
echo "Running scheduled task"
|
||||
|
||||
- name: Send heartbeat
|
||||
run: curl -s https://kener.example.com/ext/heartbeat/github-action:Secret-Token
|
||||
```
|
||||
|
||||
## Threshold Configuration Guide {#threshold-configuration-guide}
|
||||
|
||||
Choose thresholds based on your job's schedule and criticality:
|
||||
|
||||
| Job Frequency | Degraded Threshold | Down Threshold | Rationale |
|
||||
| :-------------- | :----------------- | :------------- | :--------------------------------- |
|
||||
| Every minute | 2 minutes | 5 minutes | Quick detection for frequent jobs |
|
||||
| Every 5 minutes | 5 minutes | 15 minutes | Allow some variance |
|
||||
| Hourly | 15 minutes | 60 minutes | Jobs may take time to complete |
|
||||
| Daily | 60 minutes | 240 minutes | Long lead time for daily processes |
|
||||
| Weekly | 120 minutes | 480 minutes | Very long lead for weekly tasks |
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Heartbeat Placement {#best-practices-placement}
|
||||
|
||||
1. **Send at end of task**: Only send heartbeat after successful completion.
|
||||
2. **Handle errors**: Don't send heartbeat if task fails.
|
||||
3. **Use conditional execution**: `&& curl` only runs on success.
|
||||
|
||||
### Threshold Selection {#best-practices-thresholds}
|
||||
|
||||
1. **Consider task duration**: Allow time for the task itself to run.
|
||||
2. **Add buffer**: Account for system load and variability.
|
||||
3. **Match criticality**: Critical jobs need tighter thresholds.
|
||||
4. **Test with failures**: Verify alerts trigger appropriately.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :--------------------- | :------------------------------- | :----------------------------------- |
|
||||
| Always DOWN | Heartbeat URL incorrect | Verify URL and monitor tag |
|
||||
| No data | Cron job not running | Check cron daemon and job schedule |
|
||||
| Intermittent DEGRADED | Task taking longer than expected | Increase thresholds or optimize task |
|
||||
| Always DEGRADED | Thresholds too tight | Adjust degraded threshold |
|
||||
| Heartbeat not recorded | Network issues or firewall | Verify connectivity to Kener |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Test heartbeat URL manually**:
|
||||
|
||||
```bash
|
||||
curl -v https://kener.example.com/ext/heartbeat/your-tag:Secret-Token
|
||||
```
|
||||
|
||||
2. **Check cron logs**:
|
||||
|
||||
```bash
|
||||
grep CRON /var/log/syslog
|
||||
```
|
||||
|
||||
3. **Verify task execution**:
|
||||
|
||||
```bash
|
||||
# Add logging to your script
|
||||
echo "$(date): Task started" >> /var/log/mytask.log
|
||||
```
|
||||
|
||||
4. **Monitor network**: Ensure Kener is reachable from task host.
|
||||
|
||||
## Use Cases {#use-cases}
|
||||
|
||||
| Use Case | Example |
|
||||
| :----------------------- | :---------------------------------------- |
|
||||
| **Database Backups** | Verify backup jobs complete successfully |
|
||||
| **Data Synchronization** | Monitor ETL pipelines and data imports |
|
||||
| **Report Generation** | Ensure scheduled reports are created |
|
||||
| **Cleanup Tasks** | Verify log rotation and temp file cleanup |
|
||||
| **Queue Workers** | Monitor background job processors |
|
||||
| **Health Checks** | Services that can't be externally polled |
|
||||
| **IoT Devices** | Devices that periodically check in |
|
||||
| **CI/CD Pipelines** | Monitor scheduled builds and deployments |
|
||||
- **Always NO_DATA**: endpoint never called or wrong `tag:secret`
|
||||
- **Always DOWN/DEGRADED**: thresholds too low for actual job interval
|
||||
- **Signal accepted but stale**: ensure heartbeat is sent only after successful completion
|
||||
|
||||
@@ -1,62 +1,58 @@
|
||||
---
|
||||
title: Monitors Overview
|
||||
description: Overview of monitor types and their functionalities in Kener
|
||||
description: Choose the right monitor type and understand how monitor status is computed
|
||||
---
|
||||
|
||||
A monitor in Kener is a configured check that continuously verifies the availability and performance of your services. Monitors can be set up for various types of endpoints, including APIs, websites, servers, and more.
|
||||
Monitors are checks that run on your schedule and write a status + latency sample. Use this page to pick the right monitor type and understand status behavior.
|
||||
|
||||
Each monitor is identified by a tag, which helps in organizing and filtering monitors in the dashboard.
|
||||
## Status values {#status}
|
||||
|
||||
## Monitor Statues {#status}
|
||||
Kener monitor results can be:
|
||||
|
||||
Kener categorizes monitor statuses into three main states:
|
||||
- **UP**: Check succeeded
|
||||
- **DEGRADED**: Check succeeded but is unhealthy by your logic/threshold
|
||||
- **DOWN**: Check failed
|
||||
- **MAINTENANCE**: Overridden by an active maintenance window
|
||||
- **NO_DATA**: No signal yet (mainly used by heartbeat monitors before first ping)
|
||||
|
||||
- **<span class="text-green-600 dark:text-green-400">UP</span>**: The monitored service is operational and responding as expected.
|
||||
- **<span class="text-red-600 dark:text-red-400">DOWN</span>**: The monitored service is not responding or is returning errors.
|
||||
- **<span class="text-yellow-600 dark:text-yellow-400">DEGRADED</span>**: The monitored service is responding but with performance issues or partial failures.
|
||||
- **<span class="text-blue-600 dark:text-blue-400">MAINTENANCE</span>**: The monitor is temporarily disabled for scheduled maintenance.
|
||||
- **<span class="text-gray-600 dark:text-gray-400">NO DATA</span>**: The monitor has not received any data in the expected timeframe.
|
||||
## Status priority {#status-priority}
|
||||
|
||||
### Default Status {#default-status}
|
||||
|
||||
Default status is useful when you are not making an external call to check the status of a monitor. Instead, you can set a predefined status that the monitor will always return. This means that the monitor real status will be whatever you set as the default status. This is the lowest priority status and will be overridden by any actual checks if they are configured.
|
||||
|
||||
### Status Priority {#status-priority}
|
||||
|
||||
When multiple checks or conditions are used to determine the status of a monitor, Kener follows a priority order to decide the final status:
|
||||
Final status is resolved by priority:
|
||||
|
||||
```
|
||||
MAINTENANCE > INCIDENT > REALTIME > DEFAULT
|
||||
```
|
||||
|
||||
This means that if a monitor is in maintenance mode, it will set the status to whatever is configured for the monitor in that maintenance window, regardless of other checks. If there is an active incident, that status will take precedence over realtime checks and default status. Realtime checks will override the default status if no incidents or maintenance are present.
|
||||
So realtime checks do not override an active maintenance or incident state.
|
||||
|
||||
## Scheduling {#scheduling}
|
||||
|
||||
Monitors can be scheduled to run at specific intervals, ranging from every minute to once a day. The frequency of checks can be configured based on the criticality of the service being monitored. The way you can do it is through `cron` expressions which provide flexibility in defining complex schedules.
|
||||
Monitors run from cron expressions (for example `* * * * *` for every minute). Use tighter schedules for critical services and relaxed schedules for low-risk dependencies.
|
||||
|
||||
Sample cron expressions:
|
||||
## Uptime calculation {#uptime-calculation}
|
||||
|
||||
- Every minute: `* * * * *`
|
||||
- Every 5 minutes: `*/5 * * * *`
|
||||
- Every hour: `0 * * * *`
|
||||
- Daily at midnight: `0 0 * * *`
|
||||
|
||||
## Uptime Calculation {#uptime-calculation}
|
||||
|
||||
Uptime is calculated based on the number of successful checks versus the total number of checks performed over a specified period. The default formula used is:
|
||||
Default uptime formula:
|
||||
|
||||
```
|
||||
UP + MAINTENANCE
|
||||
------------------------------------ x 100
|
||||
UP + MAINTENANCE + DEGRADED + DOWN
|
||||
UP + MAINTENANCE
|
||||
-------------------------------- x 100
|
||||
UP + MAINTENANCE + DEGRADED + DOWN
|
||||
```
|
||||
|
||||
You can customize the uptime calculation formula in the settings to fit your monitoring needs.
|
||||
## Monitor types {#monitor-types}
|
||||
|
||||
## Monitor Types {#monitor-types}
|
||||
- [API Monitor](/docs/v4/monitors/api) — HTTP/HTTPS checks with custom eval logic
|
||||
- [Ping Monitor](/docs/v4/monitors/ping) — ICMP reachability/latency for hosts
|
||||
- [TCP Monitor](/docs/v4/monitors/tcp) — TCP port open/timeout/error checks
|
||||
- [DNS Monitor](/docs/v4/monitors/dns) — DNS record matching (`ANY`/`ALL`)
|
||||
- [SSL Monitor](/docs/v4/monitors/ssl) — TLS certificate expiry thresholds
|
||||
- [SQL Monitor](/docs/v4/monitors/sql) — Run SQL query against DB connection
|
||||
- [Heartbeat Monitor](/docs/v4/monitors/heartbeat) — Push-based health signal
|
||||
- [GameDig Monitor](/docs/v4/monitors/gamedig) — Game server query checks
|
||||
- [Group Monitor](/docs/v4/monitors/group) — Weighted aggregate of member monitors
|
||||
|
||||
Kener supports various types of monitors to cater to different monitoring needs:
|
||||
## Related docs {#related-docs}
|
||||
|
||||
- [API Monitors](/docs/monitors/api): Monitor HTTP/HTTPS endpoints for uptime and response validation.
|
||||
- [Ping Monitors](./ping.md): Use ICMP to check the availability of servers
|
||||
- [Monitors (general)](/docs/v4/monitors)
|
||||
- [Sharing Monitors](/docs/v4/sharing)
|
||||
- [Alert Configurations](/docs/v4/alerting/alert-configurations)
|
||||
|
||||
@@ -1,361 +1,61 @@
|
||||
---
|
||||
title: Ping Monitor
|
||||
description: Monitor server availability and network connectivity using ICMP ping checks
|
||||
description: Monitor host reachability and latency with ICMP checks
|
||||
---
|
||||
|
||||
Ping monitors use ICMP (Internet Control Message Protocol) to verify that servers or network devices are reachable and measure network latency. Kener's ping implementation supports multiple hosts, custom evaluation logic, and flexible timeout configurations.
|
||||
Ping monitors run ICMP checks against one or more hosts and evaluate the combined result.
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :-------------- | :--------- | :------------------------------------------------------------------------------ | :------------ |
|
||||
| **Hosts** | `array` | Array of host objects to ping. Each host can have independent timeout settings. | (Required) |
|
||||
| **Custom Eval** | `function` | A JavaScript function to evaluate ping results and determine monitor status. | Default logic |
|
||||
Add at least one host entry with:
|
||||
|
||||
### Host Object Properties {#host-object-properties}
|
||||
- `type`: `IP4` or `IP6`
|
||||
- `host`
|
||||
- `timeout` (ms)
|
||||
- `count` (number of ping packets)
|
||||
|
||||
Each host in the `hosts` array supports the following properties:
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
| Property | Type | Description | Default |
|
||||
| :---------- | :------- | :-------------------------------------------------- | :------- |
|
||||
| **type** | `string` | Ping method. Currently only `"cmd"` is supported. | `"cmd"` |
|
||||
| **host** | `string` | Hostname or IP address to ping (e.g., `"8.8.8.8"`). | Required |
|
||||
| **timeout** | `number` | Ping timeout in milliseconds. | `1000` |
|
||||
| **count** | `number` | Number of ping packets to send. | `3` |
|
||||
| Field | Type | Default | Notes |
|
||||
| :--------- | :--------------------------------- | :----------------- | :------- |
|
||||
| `hosts` | `Array<{type,host,timeout,count}>` | one empty host row | Required |
|
||||
| `pingEval` | `string` (JS function) | built-in default | Optional |
|
||||
|
||||
## How Ping Evaluation Works {#how-ping-evaluation-works}
|
||||
Default host values: `type=IP4`, `timeout=1000`, `count=3`.
|
||||
|
||||
The ping monitor executes the following workflow:
|
||||
## Default eval behavior {#default-eval}
|
||||
|
||||
1. **Execute Pings**: For each host in the `hosts` array, Kener sends ICMP ping packets.
|
||||
2. **Collect Results**: Each ping returns an object containing:
|
||||
- `host`: The target hostname/IP
|
||||
- `alive`: Boolean indicating if host responded
|
||||
- `time`: Average response time in milliseconds (if successful)
|
||||
- `min`, `max`, `avg`, `stddev`: Latency statistics
|
||||
3. **Evaluate Status**: The custom eval function receives the array of all ping results and determines the final monitor status.
|
||||
Default logic:
|
||||
|
||||
### Ping Result Structure {#ping-result-structure}
|
||||
- **UP** if all hosts are alive
|
||||
- **DOWN** otherwise
|
||||
- latency = average latency across hosts
|
||||
|
||||
Each ping operation returns an object with this structure:
|
||||
## Custom eval contract {#custom-eval-contract}
|
||||
|
||||
Function input:
|
||||
|
||||
- `arrayOfPings`
|
||||
|
||||
Return object:
|
||||
|
||||
```javascript
|
||||
{
|
||||
host: "8.8.8.8", // Target host
|
||||
alive: true, // Whether ping succeeded
|
||||
time: 15.234, // Average response time (ms)
|
||||
min: 14.123, // Minimum response time (ms)
|
||||
max: 16.789, // Maximum response time (ms)
|
||||
avg: 15.234, // Average response time (ms)
|
||||
stddev: 0.891 // Standard deviation (ms)
|
||||
}
|
||||
{ status: "UP" | "DEGRADED" | "DOWN" | "MAINTENANCE", latency: number }
|
||||
```
|
||||
|
||||
If the ping fails:
|
||||
|
||||
```javascript
|
||||
{
|
||||
host: "unreachable.example.com",
|
||||
alive: false,
|
||||
time: undefined,
|
||||
min: undefined,
|
||||
max: undefined,
|
||||
avg: undefined,
|
||||
stddev: undefined
|
||||
}
|
||||
```
|
||||
|
||||
## Custom Evaluation {#custom-evaluation}
|
||||
|
||||
The evaluation function allows you to define custom logic for determining monitor status based on ping results from multiple hosts.
|
||||
|
||||
**Function Signature:**
|
||||
|
||||
```javascript
|
||||
;(arrayOfPings) => {
|
||||
// Your evaluation logic
|
||||
return {
|
||||
status: "UP" | "DOWN" | "DEGRADED",
|
||||
latency: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- `arrayOfPings` (`array`): Array of ping result objects, one for each host configured.
|
||||
|
||||
**Return Value:**
|
||||
The function can be **synchronous** or **asynchronous**. It **must** return (or resolve to) an object with:
|
||||
|
||||
- `status`: `'UP'`, `'DEGRADED'`, or `'DOWN'`.
|
||||
- `latency`: The latency to record in milliseconds (typically the average of all hosts).
|
||||
|
||||
### Default Implementation {#default-implementation}
|
||||
|
||||
Here is the default logic used if you don't provide a custom function:
|
||||
|
||||
```javascript
|
||||
;(arrayOfPings) => {
|
||||
let totalLatency = 0
|
||||
let aliveCount = 0
|
||||
|
||||
for (let i = 0; i < arrayOfPings.length; i++) {
|
||||
if (arrayOfPings[i].alive) {
|
||||
aliveCount++
|
||||
totalLatency += arrayOfPings[i].time
|
||||
}
|
||||
}
|
||||
|
||||
// All hosts must be alive for status to be UP
|
||||
if (aliveCount === arrayOfPings.length) {
|
||||
return {
|
||||
status: "UP",
|
||||
latency: totalLatency / aliveCount
|
||||
}
|
||||
}
|
||||
|
||||
// If some hosts are alive, status is DEGRADED
|
||||
if (aliveCount > 0) {
|
||||
return {
|
||||
status: "DEGRADED",
|
||||
latency: totalLatency / aliveCount
|
||||
}
|
||||
}
|
||||
|
||||
// No hosts responding
|
||||
return {
|
||||
status: "DOWN",
|
||||
latency: 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic Single Host Ping {#basic-single-host-ping}
|
||||
|
||||
Monitor a single server with default settings.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "web-server",
|
||||
"name": "Web Server",
|
||||
"type": "PING",
|
||||
"hosts": [
|
||||
{
|
||||
"type": "cmd",
|
||||
"host": "web.example.com",
|
||||
"timeout": 1000,
|
||||
"count": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Multiple Hosts with IP Addresses {#multiple-hosts-with-ip-addresses}
|
||||
|
||||
Monitor multiple servers (e.g., primary and backup) to track overall infrastructure health.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "dns-servers",
|
||||
"name": "DNS Servers",
|
||||
"type": "PING",
|
||||
"hosts": [
|
||||
{
|
||||
"type": "cmd",
|
||||
"host": "8.8.8.8",
|
||||
"timeout": 2000,
|
||||
"count": 5
|
||||
},
|
||||
{
|
||||
"type": "cmd",
|
||||
"host": "8.8.4.4",
|
||||
"timeout": 2000,
|
||||
"count": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Custom Evaluation - At Least One Host Up {#custom-evaluation-at-least-one-host-up}
|
||||
|
||||
Mark the monitor as UP if at least one host is responding (useful for redundant infrastructure).
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "load-balancers",
|
||||
"name": "Load Balancers",
|
||||
"type": "PING",
|
||||
"hosts": [
|
||||
{
|
||||
"type": "cmd",
|
||||
"host": "lb1.example.com",
|
||||
"timeout": 1000,
|
||||
"count": 3
|
||||
},
|
||||
{
|
||||
"type": "cmd",
|
||||
"host": "lb2.example.com",
|
||||
"timeout": 1000,
|
||||
"count": 3
|
||||
}
|
||||
],
|
||||
"pingEval": "(arrayOfPings) => { let aliveCount = 0; let totalLatency = 0; for (let i = 0; i < arrayOfPings.length; i++) { if (arrayOfPings[i].alive) { aliveCount++; totalLatency += arrayOfPings[i].time; } } if (aliveCount > 0) { return { status: 'UP', latency: totalLatency / aliveCount }; } return { status: 'DOWN', latency: 0 }; }"
|
||||
}
|
||||
```
|
||||
|
||||
**Formatted Custom Eval:**
|
||||
|
||||
```javascript
|
||||
;(arrayOfPings) => {
|
||||
let aliveCount = 0
|
||||
let totalLatency = 0
|
||||
|
||||
for (let i = 0; i < arrayOfPings.length; i++) {
|
||||
if (arrayOfPings[i].alive) {
|
||||
aliveCount++
|
||||
totalLatency += arrayOfPings[i].time
|
||||
}
|
||||
}
|
||||
|
||||
// UP if at least one host is alive
|
||||
if (aliveCount > 0) {
|
||||
return {
|
||||
status: "UP",
|
||||
latency: totalLatency / aliveCount
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
status: "DOWN",
|
||||
latency: 0
|
||||
"type_data": {
|
||||
"hosts": [{ "type": "IP4", "host": "8.8.8.8", "timeout": 1000, "count": 3 }]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Advanced - Latency Threshold with Degraded State {#advanced-latency-threshold-with-degraded-state}
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
Mark the monitor as DEGRADED if latency exceeds a threshold, even if all hosts are responding.
|
||||
|
||||
```javascript
|
||||
;(arrayOfPings) => {
|
||||
let totalLatency = 0
|
||||
let aliveCount = 0
|
||||
let maxLatency = 0
|
||||
|
||||
for (let i = 0; i < arrayOfPings.length; i++) {
|
||||
if (arrayOfPings[i].alive) {
|
||||
aliveCount++
|
||||
totalLatency += arrayOfPings[i].time
|
||||
maxLatency = Math.max(maxLatency, arrayOfPings[i].time)
|
||||
}
|
||||
}
|
||||
|
||||
// No hosts responding
|
||||
if (aliveCount === 0) {
|
||||
return {
|
||||
status: "DOWN",
|
||||
latency: 0
|
||||
}
|
||||
}
|
||||
|
||||
const avgLatency = totalLatency / aliveCount
|
||||
|
||||
// Check if all hosts are alive
|
||||
if (aliveCount === arrayOfPings.length) {
|
||||
// DEGRADED if average latency > 100ms or max latency > 200ms
|
||||
if (avgLatency > 100 || maxLatency > 200) {
|
||||
return {
|
||||
status: "DEGRADED",
|
||||
latency: avgLatency
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
status: "UP",
|
||||
latency: avgLatency
|
||||
}
|
||||
}
|
||||
|
||||
// Some hosts down = DEGRADED
|
||||
return {
|
||||
status: "DEGRADED",
|
||||
latency: avgLatency
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Geographic Redundancy Check {#geographic-redundancy-check}
|
||||
|
||||
Monitor servers across multiple regions and require at least 2 regions to be operational.
|
||||
|
||||
```javascript
|
||||
;(arrayOfPings) => {
|
||||
// Assuming first 2 hosts are US, next 2 are EU, last 2 are APAC
|
||||
const regions = [
|
||||
{ name: "US", hosts: [arrayOfPings[0], arrayOfPings[1]] },
|
||||
{ name: "EU", hosts: [arrayOfPings[2], arrayOfPings[3]] },
|
||||
{ name: "APAC", hosts: [arrayOfPings[4], arrayOfPings[5]] }
|
||||
]
|
||||
|
||||
let healthyRegions = 0
|
||||
let totalLatency = 0
|
||||
let totalHosts = 0
|
||||
|
||||
regions.forEach((region) => {
|
||||
const regionAlive = region.hosts.some((host) => host.alive)
|
||||
if (regionAlive) {
|
||||
healthyRegions++
|
||||
region.hosts.forEach((host) => {
|
||||
if (host.alive) {
|
||||
totalLatency += host.time
|
||||
totalHosts++
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
const avgLatency = totalHosts > 0 ? totalLatency / totalHosts : 0
|
||||
|
||||
// Need at least 2 regions healthy
|
||||
if (healthyRegions >= 2) {
|
||||
return { status: "UP", latency: avgLatency }
|
||||
}
|
||||
|
||||
// Only 1 region healthy
|
||||
if (healthyRegions === 1) {
|
||||
return { status: "DEGRADED", latency: avgLatency }
|
||||
}
|
||||
|
||||
// No regions healthy
|
||||
return { status: "DOWN", latency: 0 }
|
||||
}
|
||||
```
|
||||
|
||||
## Use Cases {#use-cases}
|
||||
|
||||
Ping monitors are ideal for:
|
||||
|
||||
- **Network Infrastructure** - Monitor routers, switches, and network devices
|
||||
- **Server Availability** - Basic reachability checks for web servers, databases, etc.
|
||||
- **Geographic Distribution** - Monitor services across multiple data centers
|
||||
- **Failover Systems** - Track primary and backup server availability
|
||||
- **ISP/Network Quality** - Monitor external DNS servers or gateways
|
||||
|
||||
## Limitations {#limitations}
|
||||
|
||||
- **ICMP-Only**: Ping only checks network reachability, not application health
|
||||
- **Firewall Blocks**: Some servers/firewalls block ICMP traffic
|
||||
- **Service Verification**: Cannot verify that specific services (HTTP, database, etc.) are running
|
||||
- **Packet Loss**: High packet loss can cause false negatives even when host is reachable
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
1. **Combine with Application Monitors**: Use ping alongside API or TCP monitors for complete coverage
|
||||
2. **Set Appropriate Counts**: Use `count: 3-5` for balance between speed and reliability
|
||||
3. **Timeout Configuration**: Set `timeout: 1000-2000ms` for most networks
|
||||
4. **Multiple Hosts for Redundancy**: Monitor backup/failover systems with custom evaluation logic
|
||||
5. **Geographic Monitoring**: Include hosts from different regions to detect regional outages
|
||||
6. **Latency Thresholds**: Use custom eval to set DEGRADED state for high-latency responses
|
||||
- **Validation fails**: host type must match detected address type
|
||||
- **Frequent DOWN**: some networks/firewalls block ICMP
|
||||
- **Noisy latency**: increase `count` or timeout to reduce false negatives
|
||||
|
||||
@@ -1,575 +1,53 @@
|
||||
---
|
||||
title: SQL Monitor
|
||||
description: Monitor database connectivity and health by executing queries against your SQL databases
|
||||
description: Monitor database connectivity by running SQL queries
|
||||
---
|
||||
|
||||
SQL monitors verify that your databases are accessible and responding to queries. They support multiple database engines and can execute custom queries to validate not just connectivity but also data integrity and performance.
|
||||
SQL monitors open a DB connection with Knex, execute your query, and mark status by success/failure.
|
||||
|
||||
## How SQL Monitoring Works {#how-sql-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's SQL monitoring follows this workflow:
|
||||
Set:
|
||||
|
||||
1. **Create Connection**: Kener uses Knex.js to establish a connection to your database using the provided connection string.
|
||||
2. **Execute Query**: The configured SQL query is executed against the database.
|
||||
3. **Measure Performance**: The total time from connection to query completion is recorded as latency.
|
||||
4. **Determine Status**: If the query executes successfully, the monitor is UP; any error results in DOWN.
|
||||
5. **Clean Up**: The database connection is properly closed to prevent resource leaks.
|
||||
- `dbType`
|
||||
- `connectionString`
|
||||
- `query` (default `SELECT 1`)
|
||||
- `timeout` (default `5000` ms)
|
||||
|
||||
### SQL Check Process {#sql-check-process}
|
||||
## Runtime behavior {#runtime-behavior}
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐
|
||||
│ Kener │ ──── Connection ───► │ Database │
|
||||
│ Monitor │ String │ Server │
|
||||
└─────────────┘ └─────────────┘
|
||||
│ │
|
||||
│ ┌────────────────────────┐ │
|
||||
└────│ Execute SQL Query │──────┘
|
||||
│ e.g., SELECT 1 │
|
||||
└────────────────────────┘
|
||||
│
|
||||
┌─────────┴─────────┐
|
||||
│ │
|
||||
┌─────▼─────┐ ┌─────▼─────┐
|
||||
│ Success │ │ Error │
|
||||
│ (Result) │ │ (Timeout/ │
|
||||
└─────┬─────┘ │ Failure) │
|
||||
│ └─────┬─────┘
|
||||
▼ ▼
|
||||
┌───────────┐ ┌───────────┐
|
||||
│ Status:UP │ │Status:DOWN│
|
||||
│ Latency:ms│ │ Type:ERROR│
|
||||
└───────────┘ └───────────┘
|
||||
```
|
||||
- Successful query within timeout → **UP**
|
||||
- Query timeout → **DOWN** (`type: TIMEOUT`)
|
||||
- Connection/query error → **DOWN** (`type: ERROR`)
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :-------------------- | :------- | :--------------------------------------------------------- | :---------- |
|
||||
| **Database Type** | `string` | The database engine to connect to. | `pg` |
|
||||
| **Connection String** | `string` | Database connection URL or connection string. | (Required) |
|
||||
| **Query** | `string` | SQL query to execute for health check. | `SELECT 1` |
|
||||
| **Timeout** | `number` | Maximum time in milliseconds to wait for query completion. | `5000` (5s) |
|
||||
| Field | Type | Default | Notes |
|
||||
| :----------------- | :------------------------------------- | :--------- | :---------------------------- |
|
||||
| `dbType` | `pg\|mysql2\|mssql\|oracledb\|sqlite3` | `pg` | Runtime supports these values |
|
||||
| `connectionString` | `string` | — | Required |
|
||||
| `query` | `string` | `SELECT 1` | Required |
|
||||
| `timeout` | `number` | `5000` | Required |
|
||||
|
||||
### Supported Database Types {#supported-database-types}
|
||||
> [!IMPORTANT]
|
||||
> Current monitor form validation requires the connection string to start with `postgresql://` or `mysql://`.
|
||||
|
||||
| Database Type | Value | Connection String Format |
|
||||
| :------------ | :--------- | :---------------------------------------------------- |
|
||||
| PostgreSQL | `pg` | `postgresql://user:password@host:5432/database` |
|
||||
| MySQL | `mysql2` | `mysql://user:password@host:3306/database` |
|
||||
| SQL Server | `mssql` | `Server=host;Database=db;User Id=user;Password=pass;` |
|
||||
| Oracle | `oracledb` | `user/password@host:1521/service` |
|
||||
| SQLite | `sqlite3` | `/path/to/database.db` |
|
||||
|
||||
## Status Evaluation Logic {#status-evaluation-logic}
|
||||
|
||||
SQL monitors use a simple success/failure evaluation:
|
||||
|
||||
```javascript
|
||||
// Pseudocode for SQL status evaluation
|
||||
try {
|
||||
// Create connection with timeout
|
||||
const connection = await createConnection(connectionString, timeout)
|
||||
|
||||
// Execute query with timeout race
|
||||
await Promise.race([connection.raw(query), timeout(timeoutMs)])
|
||||
|
||||
return { status: "UP", latency, type: "realtime" }
|
||||
} catch (error) {
|
||||
if (error.message === "Query timeout") {
|
||||
return { status: "DOWN", latency, type: "timeout" }
|
||||
}
|
||||
return { status: "DOWN", latency, type: "error" }
|
||||
}
|
||||
```
|
||||
|
||||
### Status Conditions {#status-conditions}
|
||||
|
||||
| Status | Type | Condition |
|
||||
| :------- | :------- | :------------------------------------------------------ |
|
||||
| **UP** | realtime | Query executed successfully within timeout |
|
||||
| **DOWN** | timeout | Query or connection exceeded timeout threshold |
|
||||
| **DOWN** | error | Connection failed, authentication error, or query error |
|
||||
|
||||
## Connection String Formats {#connection-string-formats}
|
||||
|
||||
### PostgreSQL {#postgresql-connection}
|
||||
|
||||
```
|
||||
postgresql://username:password@hostname:5432/database_name
|
||||
```
|
||||
|
||||
With options:
|
||||
|
||||
```
|
||||
postgresql://username:password@hostname:5432/database_name?sslmode=require
|
||||
```
|
||||
|
||||
### MySQL {#mysql-connection}
|
||||
|
||||
```
|
||||
mysql://username:password@hostname:3306/database_name
|
||||
```
|
||||
|
||||
With options:
|
||||
|
||||
```
|
||||
mysql://username:password@hostname:3306/database_name?ssl=true
|
||||
```
|
||||
|
||||
### SQL Server {#mssql-connection}
|
||||
|
||||
Standard format:
|
||||
|
||||
```
|
||||
Server=hostname;Database=database_name;User Id=username;Password=password;
|
||||
```
|
||||
|
||||
With encryption:
|
||||
|
||||
```
|
||||
Server=hostname;Database=database_name;User Id=username;Password=password;Encrypt=true;TrustServerCertificate=true;
|
||||
```
|
||||
|
||||
### Oracle {#oracle-connection}
|
||||
|
||||
Easy Connect:
|
||||
|
||||
```
|
||||
username/password@hostname:1521/service_name
|
||||
```
|
||||
|
||||
TNS format:
|
||||
|
||||
```
|
||||
username/password@(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=hostname)(PORT=1521))(CONNECT_DATA=(SERVICE_NAME=service_name)))
|
||||
```
|
||||
|
||||
### SQLite {#sqlite-connection}
|
||||
|
||||
```
|
||||
/absolute/path/to/database.db
|
||||
```
|
||||
|
||||
Or relative:
|
||||
|
||||
```
|
||||
./data/database.db
|
||||
```
|
||||
|
||||
## Using Environment Variables {#using-environment-variables}
|
||||
|
||||
Kener supports environment variable substitution in connection strings for secure credential management:
|
||||
|
||||
```
|
||||
postgresql://$DB_USER:$DB_PASSWORD@$DB_HOST:5432/$DB_NAME
|
||||
```
|
||||
|
||||
Environment variables are replaced at runtime:
|
||||
|
||||
- `$DB_USER` → value of `process.env.DB_USER`
|
||||
- `$DB_PASSWORD` → value of `process.env.DB_PASSWORD`
|
||||
|
||||
This keeps sensitive credentials out of your configuration.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic PostgreSQL Health Check {#basic-postgresql-health-check}
|
||||
|
||||
Simple connectivity check for PostgreSQL.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-main",
|
||||
"name": "PostgreSQL Primary",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_MONITOR_PASS@db.example.com:5432/myapp",
|
||||
"connectionString": "postgresql://monitor:$DB_PASSWORD@db.example.com:5432/app",
|
||||
"query": "SELECT 1",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. MySQL Database Monitor {#mysql-database-monitor}
|
||||
|
||||
Monitor a MySQL database with SSL.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "mysql-prod",
|
||||
"name": "MySQL Production",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "mysql2",
|
||||
"connectionString": "mysql://monitor:$MYSQL_PASS@mysql.example.com:3306/production?ssl=true",
|
||||
"query": "SELECT 1",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. SQL Server Health Check {#sql-server-health-check}
|
||||
|
||||
Monitor Microsoft SQL Server.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "mssql-main",
|
||||
"name": "SQL Server Main",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "mssql",
|
||||
"connectionString": "Server=sqlserver.example.com;Database=AppDB;User Id=monitor;Password=$MSSQL_PASS;Encrypt=true;",
|
||||
"query": "SELECT 1",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Oracle Database Monitor {#oracle-database-monitor}
|
||||
|
||||
Monitor an Oracle database instance.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "oracle-prod",
|
||||
"name": "Oracle Production",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "oracledb",
|
||||
"connectionString": "monitor/$ORACLE_PASS@oracle.example.com:1521/PROD",
|
||||
"query": "SELECT 1 FROM DUAL",
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. SQLite Local Database {#sqlite-local-database}
|
||||
|
||||
Monitor a local SQLite database.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "sqlite-app",
|
||||
"name": "App SQLite Database",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "sqlite3",
|
||||
"connectionString": "/var/lib/myapp/data.db",
|
||||
"query": "SELECT 1",
|
||||
"timeout": 2000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. PostgreSQL Read Replica {#postgresql-read-replica}
|
||||
|
||||
Monitor read replica with a read query.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-replica",
|
||||
"name": "PostgreSQL Read Replica",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@replica.example.com:5432/myapp",
|
||||
"query": "SELECT COUNT(*) FROM pg_stat_activity",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Query Examples {#advanced-query-examples}
|
||||
|
||||
### 7. Check Table Existence {#check-table-existence}
|
||||
|
||||
Verify a critical table exists.
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-table-check",
|
||||
"name": "Users Table Check",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@db.example.com:5432/myapp",
|
||||
"query": "SELECT 1 FROM information_schema.tables WHERE table_name = 'users'",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**MySQL:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "mysql-table-check",
|
||||
"name": "Orders Table Check",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "mysql2",
|
||||
"connectionString": "mysql://monitor:$DB_PASS@mysql.example.com:3306/shop",
|
||||
"query": "SELECT 1 FROM information_schema.tables WHERE table_schema = 'shop' AND table_name = 'orders'",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8. Check Replication Status {#check-replication-status}
|
||||
|
||||
**PostgreSQL - Check if replica is caught up:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-repl-check",
|
||||
"name": "Replication Health",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@primary.example.com:5432/myapp",
|
||||
"query": "SELECT 1 FROM pg_stat_replication WHERE state = 'streaming'",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**MySQL - Check slave status:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "mysql-repl-check",
|
||||
"name": "MySQL Replication",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "mysql2",
|
||||
"connectionString": "mysql://monitor:$DB_PASS@replica.example.com:3306/myapp",
|
||||
"query": "SELECT 1 WHERE (SELECT Slave_IO_Running FROM performance_schema.replication_connection_status) = 'YES'",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 9. Check Connection Count {#check-connection-count}
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-conn-check",
|
||||
"name": "Connection Pool Health",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@db.example.com:5432/myapp",
|
||||
"query": "SELECT 1 WHERE (SELECT count(*) FROM pg_stat_activity) < 100",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 10. Check Recent Data {#check-recent-data}
|
||||
|
||||
Verify data is being written (useful for ETL pipelines).
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "data-freshness",
|
||||
"name": "Data Pipeline Health",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@db.example.com:5432/analytics",
|
||||
"query": "SELECT 1 FROM events WHERE created_at > NOW() - INTERVAL '1 hour' LIMIT 1",
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 11. Check Database Size {#check-database-size}
|
||||
|
||||
Verify database isn't full.
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "db-size-check",
|
||||
"name": "Database Size Check",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@db.example.com:5432/myapp",
|
||||
"query": "SELECT 1 WHERE pg_database_size(current_database()) < 100000000000",
|
||||
"timeout": 10000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 12. Check Long-Running Queries {#check-long-running-queries}
|
||||
|
||||
Alert if queries are running too long.
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "long-query-check",
|
||||
"name": "Long Query Monitor",
|
||||
"type": "SQL",
|
||||
"type_data": {
|
||||
"dbType": "pg",
|
||||
"connectionString": "postgresql://monitor:$DB_PASS@db.example.com:5432/myapp",
|
||||
"query": "SELECT 1 WHERE NOT EXISTS (SELECT 1 FROM pg_stat_activity WHERE state = 'active' AND query_start < NOW() - INTERVAL '5 minutes' AND query NOT LIKE '%pg_stat_activity%')",
|
||||
"timeout": 5000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Common Health Check Queries {#common-health-check-queries}
|
||||
|
||||
### Simple Connectivity {#queries-connectivity}
|
||||
|
||||
| Database | Query | Notes |
|
||||
| :--------- | :------------------- | :------------------ |
|
||||
| PostgreSQL | `SELECT 1` | Simplest check |
|
||||
| MySQL | `SELECT 1` | Simplest check |
|
||||
| SQL Server | `SELECT 1` | Simplest check |
|
||||
| Oracle | `SELECT 1 FROM DUAL` | DUAL table required |
|
||||
| SQLite | `SELECT 1` | Simplest check |
|
||||
|
||||
### Version Check {#queries-version}
|
||||
|
||||
| Database | Query | Returns |
|
||||
| :--------- | :------------------------ | :------------------ |
|
||||
| PostgreSQL | `SELECT version()` | Full version string |
|
||||
| MySQL | `SELECT VERSION()` | Version number |
|
||||
| SQL Server | `SELECT @@VERSION` | Full version info |
|
||||
| Oracle | `SELECT * FROM V$VERSION` | Component versions |
|
||||
| SQLite | `SELECT sqlite_version()` | SQLite version |
|
||||
|
||||
### Database Status {#queries-status}
|
||||
|
||||
| Database | Query | Purpose |
|
||||
| :--------- | :---------------------------------------------- | :--------------- |
|
||||
| PostgreSQL | `SELECT pg_is_in_recovery()` | Check if replica |
|
||||
| MySQL | `SHOW STATUS LIKE 'Uptime'` | Server uptime |
|
||||
| SQL Server | `SELECT DATABASEPROPERTYEX('dbname', 'Status')` | Database state |
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Connection String Security {#best-practices-security}
|
||||
|
||||
1. **Use environment variables**: Never hardcode passwords in configuration.
|
||||
2. **Create monitor users**: Use dedicated read-only accounts for monitoring.
|
||||
3. **Limit permissions**: Monitor accounts should only have SELECT privilege.
|
||||
4. **Use SSL/TLS**: Enable encryption for database connections.
|
||||
|
||||
### Query Design {#best-practices-queries}
|
||||
|
||||
1. **Keep queries simple**: `SELECT 1` is sufficient for basic connectivity.
|
||||
2. **Avoid heavy queries**: Don't impact production performance.
|
||||
3. **Use timeouts**: Always set appropriate timeouts.
|
||||
4. **Test queries first**: Verify queries work before configuring monitors.
|
||||
|
||||
### Timeout Configuration {#best-practices-timeout}
|
||||
|
||||
| Scenario | Recommended Timeout | Rationale |
|
||||
| :-------------- | :------------------ | :----------------------- |
|
||||
| Local database | 2000-3000ms | Fast local connections |
|
||||
| Same datacenter | 5000ms | Standard network latency |
|
||||
| Cross-region | 10000ms | Higher latency expected |
|
||||
| Complex queries | 15000-30000ms | Query execution time |
|
||||
|
||||
### Monitor User Permissions {#best-practices-permissions}
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```sql
|
||||
CREATE USER kener_monitor WITH PASSWORD 'secure_password';
|
||||
GRANT CONNECT ON DATABASE myapp TO kener_monitor;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA public TO kener_monitor;
|
||||
```
|
||||
|
||||
**MySQL:**
|
||||
|
||||
```sql
|
||||
CREATE USER 'kener_monitor'@'%' IDENTIFIED BY 'secure_password';
|
||||
GRANT SELECT ON myapp.* TO 'kener_monitor'@'%';
|
||||
FLUSH PRIVILEGES;
|
||||
```
|
||||
|
||||
**SQL Server:**
|
||||
|
||||
```sql
|
||||
CREATE LOGIN kener_monitor WITH PASSWORD = 'secure_password';
|
||||
CREATE USER kener_monitor FOR LOGIN kener_monitor;
|
||||
GRANT SELECT TO kener_monitor;
|
||||
```
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :-------------------- | :------------------------------------- | :-------------------------------------- |
|
||||
| Connection refused | Wrong host/port or firewall | Verify network connectivity |
|
||||
| Authentication failed | Wrong credentials | Check username/password |
|
||||
| Timeout | Slow network or overloaded database | Increase timeout or optimize database |
|
||||
| SSL required | Database requires encrypted connection | Add SSL parameters to connection string |
|
||||
| Database not found | Wrong database name | Verify database exists |
|
||||
| Permission denied | Monitor user lacks privileges | Grant required permissions |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Test connection manually**:
|
||||
|
||||
```bash
|
||||
# PostgreSQL
|
||||
psql "postgresql://user:pass@host:5432/db"
|
||||
|
||||
# MySQL
|
||||
mysql -h host -u user -p database
|
||||
|
||||
# SQL Server
|
||||
sqlcmd -S host -U user -P password -d database
|
||||
```
|
||||
|
||||
2. **Check database logs**: Look for connection attempts and errors.
|
||||
|
||||
3. **Verify network path**: Ensure Kener can reach the database server.
|
||||
|
||||
4. **Test from Kener host**: Run queries from the same machine running Kener.
|
||||
|
||||
### Latency Interpretation {#latency-interpretation}
|
||||
|
||||
SQL monitor latency includes:
|
||||
|
||||
- DNS resolution
|
||||
- TCP connection establishment
|
||||
- TLS handshake (if SSL enabled)
|
||||
- Authentication
|
||||
- Query execution
|
||||
- Result retrieval
|
||||
|
||||
Typical latencies for `SELECT 1`:
|
||||
|
||||
| Scenario | Expected Latency |
|
||||
| :-------------- | :--------------- |
|
||||
| Local database | 1-10ms |
|
||||
| Same datacenter | 5-30ms |
|
||||
| Cross-region | 50-150ms |
|
||||
| Cross-continent | 100-300ms |
|
||||
|
||||
High latency may indicate:
|
||||
|
||||
- Network congestion
|
||||
- Database overload
|
||||
- Connection pool exhaustion
|
||||
- Slow authentication (LDAP, etc.)
|
||||
- **Timeout**: increase timeout or optimize query/network path
|
||||
- **Auth/connection errors**: verify driver type + connection string + credentials
|
||||
- **Permission errors**: use a monitor user with minimum required read access
|
||||
|
||||
@@ -1,118 +1,44 @@
|
||||
---
|
||||
title: SSL Monitor
|
||||
description: Monitor SSL/TLS certificate expiration and ensure your services remain secure
|
||||
description: Monitor TLS certificate expiry with degraded/down thresholds
|
||||
---
|
||||
|
||||
SSL monitors check the validity and expiration of SSL/TLS certificates on your servers. They help you proactively detect expiring certificates before they cause service disruptions or security warnings for your users.
|
||||
SSL monitors open a TLS connection, read the peer certificate expiry date, and map remaining time to monitor status.
|
||||
|
||||
## How SSL Monitoring Works {#how-ssl-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's SSL monitoring follows this workflow:
|
||||
Set:
|
||||
|
||||
1. **TLS Connection**: Kener establishes a TLS connection to the specified host and port.
|
||||
2. **Certificate Retrieval**: The server's SSL certificate is retrieved using the `getPeerCertificate()` method.
|
||||
3. **Expiry Calculation**: The certificate's `valid_to` date is compared against the current time.
|
||||
4. **Status Determination**: Based on configurable thresholds, the monitor status is set to UP, DEGRADED, or DOWN.
|
||||
5. **Latency Recording**: The time taken to establish the TLS connection is recorded as latency.
|
||||
- `host`
|
||||
- `port` (default `443`)
|
||||
- `degradedRemainingHours` (default `168`)
|
||||
- `downRemainingHours` (default `24`)
|
||||
|
||||
### SSL Check Process {#ssl-check-process}
|
||||
`degradedRemainingHours` must be greater than `downRemainingHours`.
|
||||
|
||||
```
|
||||
┌─────────────┐ TLS Handshake ┌─────────────┐
|
||||
│ Kener │ ───────────────────► │ Server │
|
||||
│ Monitor │ (Port 443) │ (HTTPS) │
|
||||
└─────────────┘ └─────────────┘
|
||||
│ │
|
||||
│ ┌────────────────────────┐ │
|
||||
└────│ Get Peer Certificate │◄─────┘
|
||||
└────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ Certificate Info: │
|
||||
│ - valid_to: 2026-06-15 │
|
||||
│ - Time remaining: 131d │
|
||||
└────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ Compare with thresholds│
|
||||
│ DEGRADED: 168h (7d) │
|
||||
│ DOWN: 24h (1d) │
|
||||
└────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌────────────────────────┐
|
||||
│ Status: UP │
|
||||
│ (131 days > 7 days) │
|
||||
└────────────────────────┘
|
||||
```
|
||||
## Status logic {#status-logic}
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
Let `hours` = hours until certificate expiry:
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :--------------------------- | :------- | :------------------------------------------------------------ | :--------- |
|
||||
| **Host** | `string` | The domain name or IP address to check (e.g., `example.com`). | (Required) |
|
||||
| **Port** | `string` | The port to connect to for TLS. | `443` |
|
||||
| **Degraded Remaining Hours** | `number` | Hours until expiry to mark as DEGRADED. | `168` (7d) |
|
||||
| **Down Remaining Hours** | `number` | Hours until expiry to mark as DOWN. | `24` (1d) |
|
||||
- `hours > degradedRemainingHours` → **UP**
|
||||
- `downRemainingHours < hours <= degradedRemainingHours` → **DEGRADED**
|
||||
- `hours <= downRemainingHours` → **DOWN**
|
||||
|
||||
## Status Evaluation Logic {#status-evaluation-logic}
|
||||
Connection/certificate retrieval errors also return **DOWN**.
|
||||
|
||||
The SSL monitor uses a threshold-based evaluation to determine status:
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
```javascript
|
||||
// Pseudocode for SSL status evaluation
|
||||
const hoursUntilExpiry = (certificate.validTo - now) / (1000 * 60 * 60)
|
||||
| Field | Type | Default | Notes |
|
||||
| :----------------------- | :------- | :------ | :----------------------------- |
|
||||
| `host` | `string` | — | Required |
|
||||
| `port` | `string` | `443` | Numeric string accepted |
|
||||
| `degradedRemainingHours` | `number` | `168` | Must be > `downRemainingHours` |
|
||||
| `downRemainingHours` | `number` | `24` | |
|
||||
|
||||
if (hoursUntilExpiry > degradedRemainingHours) {
|
||||
return { status: "UP", latency }
|
||||
} else if (hoursUntilExpiry > downRemainingHours) {
|
||||
return { status: "DEGRADED", latency }
|
||||
} else {
|
||||
return { status: "DOWN", latency }
|
||||
}
|
||||
```
|
||||
|
||||
### Status Conditions {#status-conditions}
|
||||
|
||||
| Status | Condition | Meaning |
|
||||
| :----------- | :---------------------------------------------------- | :------------------------------ |
|
||||
| **UP** | Hours remaining > Degraded threshold | Certificate is healthy |
|
||||
| **DEGRADED** | Down threshold < Hours remaining ≤ Degraded threshold | Certificate expiring soon |
|
||||
| **DOWN** | Hours remaining ≤ Down threshold OR connection failed | Certificate critical or expired |
|
||||
|
||||
### Error Handling {#error-handling}
|
||||
|
||||
The monitor returns DOWN status when:
|
||||
|
||||
- **Connection refused**: Server not accepting TLS connections
|
||||
- **No certificate**: Server doesn't present a valid certificate
|
||||
- **Invalid certificate**: Certificate cannot be parsed
|
||||
- **Network timeout**: Connection takes too long
|
||||
- **DNS failure**: Hostname cannot be resolved
|
||||
|
||||
## Threshold Configuration Guide {#threshold-configuration-guide}
|
||||
|
||||
Choose thresholds based on your certificate renewal process:
|
||||
|
||||
| Renewal Process | Degraded Threshold | Down Threshold | Rationale |
|
||||
| :---------------------- | :----------------- | :------------- | :-------------------------------- |
|
||||
| **Auto-renewal (ACME)** | 168h (7 days) | 24h (1 day) | Let's Encrypt renews at 30 days |
|
||||
| **Manual renewal** | 720h (30 days) | 168h (7 days) | More time to coordinate renewal |
|
||||
| **Enterprise CA** | 2160h (90 days) | 720h (30 days) | Long lead time for procurement |
|
||||
| **Critical services** | 336h (14 days) | 72h (3 days) | Extra buffer for critical systems |
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic HTTPS Website {#basic-https-website}
|
||||
|
||||
Monitor a standard website's SSL certificate.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "website-ssl",
|
||||
"name": "Website SSL Certificate",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "example.com",
|
||||
@@ -123,217 +49,8 @@ Monitor a standard website's SSL certificate.
|
||||
}
|
||||
```
|
||||
|
||||
### 2. API Endpoint Certificate {#api-endpoint-certificate}
|
||||
|
||||
Monitor an API server with stricter thresholds.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "api-ssl",
|
||||
"name": "API SSL Certificate",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "api.example.com",
|
||||
"port": "443",
|
||||
"degradedRemainingHours": 336,
|
||||
"downRemainingHours": 72
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Mail Server (SMTPS) {#mail-server-smtps}
|
||||
|
||||
Monitor SMTP server with TLS on port 465.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "mail-ssl",
|
||||
"name": "Mail Server SSL",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "mail.example.com",
|
||||
"port": "465",
|
||||
"degradedRemainingHours": 168,
|
||||
"downRemainingHours": 24
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. IMAP Server (IMAPS) {#imap-server-imaps}
|
||||
|
||||
Monitor IMAP server SSL certificate.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "imap-ssl",
|
||||
"name": "IMAP Server SSL",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "imap.example.com",
|
||||
"port": "993",
|
||||
"degradedRemainingHours": 168,
|
||||
"downRemainingHours": 24
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Database Server (PostgreSQL SSL) {#database-ssl}
|
||||
|
||||
Monitor PostgreSQL server with SSL enabled.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-ssl",
|
||||
"name": "PostgreSQL SSL Certificate",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "db.example.com",
|
||||
"port": "5432",
|
||||
"degradedRemainingHours": 720,
|
||||
"downRemainingHours": 168
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. Custom Port Service {#custom-port-service}
|
||||
|
||||
Monitor a custom application with SSL on a non-standard port.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "custom-app-ssl",
|
||||
"name": "Custom App SSL",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "app.example.com",
|
||||
"port": "8443",
|
||||
"degradedRemainingHours": 168,
|
||||
"downRemainingHours": 24
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 7. Enterprise CA with Long Lead Time {#enterprise-ca}
|
||||
|
||||
Monitor certificates from enterprise CAs requiring procurement.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "enterprise-ssl",
|
||||
"name": "Enterprise Service SSL",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "internal.company.com",
|
||||
"port": "443",
|
||||
"degradedRemainingHours": 2160,
|
||||
"downRemainingHours": 720
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8. Let's Encrypt Auto-Renewal {#lets-encrypt-auto-renewal}
|
||||
|
||||
Monitor with thresholds aligned to Let's Encrypt renewal timing.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "letsencrypt-ssl",
|
||||
"name": "Auto-Renewed SSL",
|
||||
"type": "SSL",
|
||||
"type_data": {
|
||||
"host": "blog.example.com",
|
||||
"port": "443",
|
||||
"degradedRemainingHours": 504,
|
||||
"downRemainingHours": 168
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Common SSL Ports {#common-ssl-ports}
|
||||
|
||||
| Service | Port | Protocol | Notes |
|
||||
| :------------ | :---- | :-------------- | :---------------------------- |
|
||||
| HTTPS | 443 | HTTP over TLS | Standard web traffic |
|
||||
| SMTPS | 465 | SMTP over TLS | Secure email submission |
|
||||
| SMTP STARTTLS | 587 | SMTP + STARTTLS | Email submission with upgrade |
|
||||
| IMAPS | 993 | IMAP over TLS | Secure email retrieval |
|
||||
| POP3S | 995 | POP3 over TLS | Secure email retrieval |
|
||||
| LDAPS | 636 | LDAP over TLS | Secure directory access |
|
||||
| FTPS | 990 | FTP over TLS | Secure file transfer |
|
||||
| PostgreSQL | 5432 | PostgreSQL | When SSL enabled |
|
||||
| MySQL | 3306 | MySQL | When SSL enabled |
|
||||
| MongoDB | 27017 | MongoDB | When TLS enabled |
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Threshold Selection {#best-practices-thresholds}
|
||||
|
||||
1. **Know your renewal process**: Auto-renewal needs shorter thresholds than manual.
|
||||
2. **Add buffer time**: Account for weekends, holidays, and approval processes.
|
||||
3. **Consider criticality**: Production services need longer warning periods.
|
||||
4. **Align with SLAs**: Match thresholds to your incident response times.
|
||||
|
||||
### Monitoring Strategy {#best-practices-strategy}
|
||||
|
||||
1. **Monitor all endpoints**: Each domain and subdomain may have different certificates.
|
||||
2. **Check non-443 ports**: Don't forget mail servers, databases, and custom apps.
|
||||
3. **Include internal services**: Internal certificates expire too.
|
||||
4. **Set up alerts**: Configure notifications for DEGRADED status.
|
||||
|
||||
### Certificate Management {#best-practices-management}
|
||||
|
||||
1. **Use automation**: Let's Encrypt/ACME for automatic renewal where possible.
|
||||
2. **Track manually-renewed certs**: Enterprise CAs need more attention.
|
||||
3. **Document renewal processes**: Know who renews what and how.
|
||||
4. **Test renewal process**: Verify automation works before certificates expire.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :--------------------- | :---------------------------- | :------------------------------------- |
|
||||
| Always DOWN | Wrong hostname or port | Verify host and port are correct |
|
||||
| Connection refused | Firewall blocking connection | Check network/firewall rules |
|
||||
| No certificate found | Server not configured for SSL | Verify SSL is enabled on server |
|
||||
| Wrong certificate | SNI not working | Ensure hostname matches certificate |
|
||||
| Unexpected expiry date | Wrong certificate in chain | Check server certificate configuration |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Check certificate manually**:
|
||||
|
||||
```bash
|
||||
openssl s_client -connect example.com:443 -servername example.com
|
||||
```
|
||||
|
||||
2. **View certificate details**:
|
||||
|
||||
```bash
|
||||
echo | openssl s_client -connect example.com:443 2>/dev/null | openssl x509 -noout -dates
|
||||
```
|
||||
|
||||
3. **Check certificate chain**:
|
||||
|
||||
```bash
|
||||
openssl s_client -connect example.com:443 -showcerts
|
||||
```
|
||||
|
||||
4. **Test from Kener server**: Ensure the Kener host can reach the target.
|
||||
|
||||
### Latency Interpretation {#latency-interpretation}
|
||||
|
||||
SSL check latency includes:
|
||||
|
||||
- DNS resolution time
|
||||
- TCP connection establishment
|
||||
- TLS handshake (certificate exchange)
|
||||
|
||||
Typical latencies:
|
||||
|
||||
| Scenario | Expected Latency |
|
||||
| :-------------- | :--------------- |
|
||||
| Local network | 10-50ms |
|
||||
| Same region | 50-150ms |
|
||||
| Cross-region | 150-300ms |
|
||||
| Cross-continent | 200-500ms |
|
||||
- **Immediate DOWN**: wrong host/port or TLS unavailable
|
||||
- **Validation fails**: ensure degraded threshold is strictly greater than down threshold
|
||||
- **Unexpected expiry result**: verify served certificate/SNI for that host
|
||||
|
||||
@@ -1,571 +1,61 @@
|
||||
---
|
||||
title: TCP Monitor
|
||||
description: Monitor TCP port availability and connectivity with customizable host configurations and evaluation logic
|
||||
description: Monitor TCP port availability for hosts and services
|
||||
---
|
||||
|
||||
TCP monitors check whether services are accepting connections on specific ports by attempting to establish TCP connections. This is ideal for monitoring databases, mail servers, game servers, custom applications, or any network service that listens on a TCP port.
|
||||
TCP monitors test whether target ports are reachable (`open`, `timeout`, or `error`).
|
||||
|
||||
## How TCP Monitoring Works {#how-tcp-monitoring-works}
|
||||
## Minimum setup {#minimum-setup}
|
||||
|
||||
Kener's TCP monitoring follows this workflow:
|
||||
Add at least one host entry:
|
||||
|
||||
1. **Connection Attempt**: For each host in your configuration, Kener creates a TCP socket and attempts to connect to the specified host and port.
|
||||
2. **Measure Latency**: Using high-precision timing (`process.hrtime`), Kener measures exactly how long the connection takes in milliseconds.
|
||||
3. **Determine Connection Status**: Each connection attempt results in one of three states:
|
||||
- `open`: Connection established successfully
|
||||
- `timeout`: Connection attempt exceeded the specified timeout
|
||||
- `error`: Connection failed (refused, unreachable, DNS failure, etc.)
|
||||
4. **Aggregate Results**: All connection results are collected into an array.
|
||||
5. **Evaluate Status**: The custom eval function receives the array and determines the final monitor status.
|
||||
- `type`: `IP4` or `IP6`
|
||||
- `host`
|
||||
- `port`
|
||||
- `timeout` in ms
|
||||
|
||||
## Configuration Options {#configuration-options}
|
||||
## Configuration fields {#configuration-fields}
|
||||
|
||||
| Field | Type | Description | Default |
|
||||
| :-------------- | :--------- | :--------------------------------------------------------------------------- | :------------ |
|
||||
| **Hosts** | `array` | Array of TCP host objects to check. Each host can have independent settings. | (Required) |
|
||||
| **Custom Eval** | `function` | A JavaScript function to evaluate connection results and determine status. | Default logic |
|
||||
| Field | Type | Default | Notes |
|
||||
| :-------- | :-------------------------------- | :----------------- | :------- |
|
||||
| `hosts` | `Array<{type,host,port,timeout}>` | one empty host row | Required |
|
||||
| `tcpEval` | `string` (JS function) | built-in default | Optional |
|
||||
|
||||
### Host Object Properties {#host-object-properties}
|
||||
Default host values: `type=IP4`, `port=80`, `timeout=1000`.
|
||||
|
||||
Each host in the `hosts` array supports the following properties:
|
||||
## Default eval behavior {#default-eval}
|
||||
|
||||
| Property | Type | Description | Default |
|
||||
| :---------- | :------- | :----------------------------------------------------------------------------- | :--------- |
|
||||
| **type** | `string` | IP version to use. `"tcp"` for IPv4 or `"IP6"` for IPv6. | `"tcp"` |
|
||||
| **host** | `string` | Hostname or IP address of the target server. | (Required) |
|
||||
| **port** | `number` | TCP port number to connect to (1-65535). | `80` |
|
||||
| **timeout** | `number` | Maximum time in milliseconds to wait for connection before marking as timeout. | `1000` |
|
||||
Default logic:
|
||||
|
||||
## TCP Connection Result Structure {#tcp-connection-result-structure}
|
||||
- **UP** if every host result has `status === "open"`
|
||||
- **DOWN** otherwise
|
||||
- latency = average host latency
|
||||
|
||||
Each TCP connection attempt returns an object with the following structure:
|
||||
## Custom eval contract {#custom-eval-contract}
|
||||
|
||||
Function input:
|
||||
|
||||
- `arrayOfPings` (TCP results)
|
||||
|
||||
Return object:
|
||||
|
||||
```javascript
|
||||
{
|
||||
status: "open", // "open", "timeout", or "error"
|
||||
latency: 45.23, // Connection time in milliseconds (high precision)
|
||||
host: "db.example.com", // Target hostname
|
||||
port: 5432, // Target port
|
||||
type: "tcp" // IP version used ("tcp" for IPv4, "IP6" for IPv6)
|
||||
}
|
||||
{ status: "UP" | "DEGRADED" | "DOWN" | "MAINTENANCE", latency: number }
|
||||
```
|
||||
|
||||
### Status Values Explained {#status-values-explained}
|
||||
|
||||
| Status | Meaning |
|
||||
| :---------- | :--------------------------------------------------------------------------------------------- |
|
||||
| `"open"` | Connection succeeded. The port is accepting connections and the service is likely running. |
|
||||
| `"timeout"` | Connection attempt exceeded the timeout value. Service may be slow or firewall is blocking. |
|
||||
| `"error"` | Connection failed immediately. Port may be closed, host unreachable, or DNS resolution failed. |
|
||||
|
||||
### Connection Timing {#connection-timing}
|
||||
|
||||
Kener uses Node.js `process.hrtime.bigint()` for nanosecond-precision timing, which is then converted to milliseconds. This ensures accurate latency measurements even for very fast local connections.
|
||||
|
||||
## Custom Evaluation {#custom-evaluation}
|
||||
|
||||
The evaluation function allows you to define custom logic for determining monitor status based on TCP connection results from multiple hosts.
|
||||
|
||||
### Function Signature {#function-signature}
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
// Your evaluation logic
|
||||
return {
|
||||
status: "UP" | "DOWN" | "DEGRADED",
|
||||
latency: number
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### Parameters {#parameters}
|
||||
|
||||
- `arrayOfPings` (`array`): Array of TCP connection result objects, one for each host configured.
|
||||
|
||||
### Return Value {#return-value}
|
||||
|
||||
The function can be **synchronous** or **asynchronous** (using `async` or returning a `Promise`). It **must** return (or resolve to) an object with:
|
||||
|
||||
- `status`: `'UP'`, `'DEGRADED'`, `'DOWN'`, or `'MAINTENANCE'`.
|
||||
- `latency`: The latency to record in milliseconds.
|
||||
|
||||
### Default Implementation {#default-implementation}
|
||||
|
||||
Here is the default logic used if you don't provide a custom function:
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
let latencyTotal = arrayOfPings.reduce((acc, ping) => {
|
||||
return acc + ping.latency
|
||||
}, 0)
|
||||
|
||||
let alive = arrayOfPings.reduce((acc, ping) => {
|
||||
return acc && ping.status === "open"
|
||||
}, true)
|
||||
|
||||
return {
|
||||
status: alive ? "UP" : "DOWN",
|
||||
latency: latencyTotal / arrayOfPings.length
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
**Default Behavior:**
|
||||
|
||||
- **UP**: All hosts have `status === "open"`
|
||||
- **DOWN**: Any host has a status other than `"open"` (timeout or error)
|
||||
- **Latency**: Average of all connection times
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
### 1. Basic Single Port Check {#basic-single-port-check}
|
||||
|
||||
Monitor a single web server on port 80.
|
||||
## Example {#example}
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "web-server",
|
||||
"name": "Web Server",
|
||||
"type": "TCP",
|
||||
"type_data": {
|
||||
"hosts": [
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "www.example.com",
|
||||
"port": 80,
|
||||
"timeout": 2000
|
||||
}
|
||||
]
|
||||
"hosts": [{ "type": "IP4", "host": "db.example.com", "port": 5432, "timeout": 2000 }]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Database Server Monitoring {#database-server-monitoring}
|
||||
|
||||
Monitor a PostgreSQL database on its default port.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "postgres-db",
|
||||
"name": "PostgreSQL Database",
|
||||
"type": "TCP",
|
||||
"type_data": {
|
||||
"hosts": [
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "db.example.com",
|
||||
"port": 5432,
|
||||
"timeout": 3000
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Multiple Services on Same Host {#multiple-services-same-host}
|
||||
|
||||
Monitor multiple services running on the same server (web, SSH, database).
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "app-server-services",
|
||||
"name": "Application Server",
|
||||
"type": "TCP",
|
||||
"type_data": {
|
||||
"hosts": [
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "app.example.com",
|
||||
"port": 443,
|
||||
"timeout": 2000
|
||||
},
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "app.example.com",
|
||||
"port": 22,
|
||||
"timeout": 2000
|
||||
},
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "app.example.com",
|
||||
"port": 3306,
|
||||
"timeout": 3000
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Redis Cluster Monitoring {#redis-cluster-monitoring}
|
||||
|
||||
Monitor all nodes in a Redis cluster.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "redis-cluster",
|
||||
"name": "Redis Cluster",
|
||||
"type": "TCP",
|
||||
"type_data": {
|
||||
"hosts": [
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "redis-node-1.example.com",
|
||||
"port": 6379,
|
||||
"timeout": 1000
|
||||
},
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "redis-node-2.example.com",
|
||||
"port": 6379,
|
||||
"timeout": 1000
|
||||
},
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "redis-node-3.example.com",
|
||||
"port": 6379,
|
||||
"timeout": 1000
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. IPv6 Service Monitoring {#ipv6-service-monitoring}
|
||||
|
||||
Monitor a service over IPv6.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "ipv6-service",
|
||||
"name": "IPv6 Web Server",
|
||||
"type": "TCP",
|
||||
"type_data": {
|
||||
"hosts": [
|
||||
{
|
||||
"type": "IP6",
|
||||
"host": "2001:db8::1",
|
||||
"port": 443,
|
||||
"timeout": 2000
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 6. Game Server Monitoring {#game-server-monitoring}
|
||||
|
||||
Monitor a Minecraft server's query port.
|
||||
|
||||
```json
|
||||
{
|
||||
"tag": "minecraft-server",
|
||||
"name": "Minecraft Server",
|
||||
"type": "TCP",
|
||||
"type_data": {
|
||||
"hosts": [
|
||||
{
|
||||
"type": "tcp",
|
||||
"host": "mc.example.com",
|
||||
"port": 25565,
|
||||
"timeout": 5000
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Evaluation Examples {#advanced-evaluation-examples}
|
||||
|
||||
### 7. Quorum-Based Status (Majority Must Be Up) {#quorum-based-status}
|
||||
|
||||
For distributed systems where you need at least a majority of nodes responding.
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
let openCount = 0
|
||||
let latencyTotal = 0
|
||||
|
||||
for (let ping of arrayOfPings) {
|
||||
if (ping.status === "open") {
|
||||
openCount++
|
||||
latencyTotal += ping.latency
|
||||
}
|
||||
}
|
||||
|
||||
const totalHosts = arrayOfPings.length
|
||||
const majority = Math.floor(totalHosts / 2) + 1
|
||||
const avgLatency = openCount > 0 ? latencyTotal / openCount : 0
|
||||
|
||||
if (openCount === totalHosts) {
|
||||
return { status: "UP", latency: avgLatency }
|
||||
} else if (openCount >= majority) {
|
||||
return { status: "DEGRADED", latency: avgLatency }
|
||||
} else {
|
||||
return { status: "DOWN", latency: avgLatency }
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### 8. Latency Threshold Evaluation {#latency-threshold-evaluation}
|
||||
|
||||
Mark service as degraded if latency exceeds a threshold, even if connection succeeds.
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
const LATENCY_THRESHOLD = 100 // milliseconds
|
||||
|
||||
let allOpen = true
|
||||
let latencyTotal = 0
|
||||
let hasHighLatency = false
|
||||
|
||||
for (let ping of arrayOfPings) {
|
||||
if (ping.status !== "open") {
|
||||
allOpen = false
|
||||
} else {
|
||||
latencyTotal += ping.latency
|
||||
if (ping.latency > LATENCY_THRESHOLD) {
|
||||
hasHighLatency = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const avgLatency = latencyTotal / arrayOfPings.length
|
||||
|
||||
if (!allOpen) {
|
||||
return { status: "DOWN", latency: avgLatency }
|
||||
} else if (hasHighLatency) {
|
||||
return { status: "DEGRADED", latency: avgLatency }
|
||||
} else {
|
||||
return { status: "UP", latency: avgLatency }
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### 9. Primary/Secondary Failover Check {#primary-secondary-failover}
|
||||
|
||||
Check if primary is up; if not, verify secondary is handling traffic.
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
// Assume first host is primary, rest are secondaries
|
||||
const primary = arrayOfPings[0]
|
||||
const secondaries = arrayOfPings.slice(1)
|
||||
|
||||
if (primary.status === "open") {
|
||||
// Primary is healthy
|
||||
return { status: "UP", latency: primary.latency }
|
||||
}
|
||||
|
||||
// Primary is down, check secondaries
|
||||
const workingSecondary = secondaries.find((s) => s.status === "open")
|
||||
|
||||
if (workingSecondary) {
|
||||
// Failover is working but primary is down
|
||||
return { status: "DEGRADED", latency: workingSecondary.latency }
|
||||
}
|
||||
|
||||
// Both primary and all secondaries are down
|
||||
return { status: "DOWN", latency: 0 }
|
||||
})
|
||||
```
|
||||
|
||||
### 10. Weighted Service Importance {#weighted-service-importance}
|
||||
|
||||
Different services have different importance levels.
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
// Weights: higher = more important
|
||||
// Order must match hosts array order
|
||||
const weights = [10, 5, 3] // e.g., database, cache, logging
|
||||
|
||||
let totalWeight = 0
|
||||
let healthyWeight = 0
|
||||
let latencyTotal = 0
|
||||
let openCount = 0
|
||||
|
||||
for (let i = 0; i < arrayOfPings.length; i++) {
|
||||
const weight = weights[i] || 1
|
||||
totalWeight += weight
|
||||
|
||||
if (arrayOfPings[i].status === "open") {
|
||||
healthyWeight += weight
|
||||
latencyTotal += arrayOfPings[i].latency
|
||||
openCount++
|
||||
}
|
||||
}
|
||||
|
||||
const healthPercent = healthyWeight / totalWeight
|
||||
const avgLatency = openCount > 0 ? latencyTotal / openCount : 0
|
||||
|
||||
if (healthPercent === 1) {
|
||||
return { status: "UP", latency: avgLatency }
|
||||
} else if (healthPercent >= 0.5) {
|
||||
return { status: "DEGRADED", latency: avgLatency }
|
||||
} else {
|
||||
return { status: "DOWN", latency: avgLatency }
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### 11. Timeout-Specific Handling {#timeout-specific-handling}
|
||||
|
||||
Differentiate between timeouts (slow) and errors (down).
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
let openCount = 0
|
||||
let timeoutCount = 0
|
||||
let errorCount = 0
|
||||
let latencyTotal = 0
|
||||
|
||||
for (let ping of arrayOfPings) {
|
||||
if (ping.status === "open") {
|
||||
openCount++
|
||||
latencyTotal += ping.latency
|
||||
} else if (ping.status === "timeout") {
|
||||
timeoutCount++
|
||||
} else {
|
||||
errorCount++
|
||||
}
|
||||
}
|
||||
|
||||
const avgLatency = openCount > 0 ? latencyTotal / openCount : 0
|
||||
|
||||
// All connections successful
|
||||
if (openCount === arrayOfPings.length) {
|
||||
return { status: "UP", latency: avgLatency }
|
||||
}
|
||||
|
||||
// Some timeouts but no hard errors - service is slow
|
||||
if (errorCount === 0 && timeoutCount > 0) {
|
||||
return { status: "DEGRADED", latency: avgLatency }
|
||||
}
|
||||
|
||||
// Mix of states - partial outage
|
||||
if (openCount > 0) {
|
||||
return { status: "DEGRADED", latency: avgLatency }
|
||||
}
|
||||
|
||||
// Complete failure
|
||||
return { status: "DOWN", latency: 0 }
|
||||
})
|
||||
```
|
||||
|
||||
### 12. Percentile-Based Latency Evaluation {#percentile-based-latency}
|
||||
|
||||
Use 95th percentile latency for status determination.
|
||||
|
||||
```javascript
|
||||
;(async function (arrayOfPings) {
|
||||
const P95_THRESHOLD = 150 // milliseconds
|
||||
const P99_THRESHOLD = 300 // milliseconds
|
||||
|
||||
// Extract latencies from successful connections only
|
||||
const latencies = arrayOfPings
|
||||
.filter((p) => p.status === "open")
|
||||
.map((p) => p.latency)
|
||||
.sort((a, b) => a - b)
|
||||
|
||||
if (latencies.length === 0) {
|
||||
return { status: "DOWN", latency: 0 }
|
||||
}
|
||||
|
||||
// Calculate percentiles
|
||||
const p95Index = Math.floor(latencies.length * 0.95)
|
||||
const p99Index = Math.floor(latencies.length * 0.99)
|
||||
const p95 = latencies[p95Index] || latencies[latencies.length - 1]
|
||||
const p99 = latencies[p99Index] || latencies[latencies.length - 1]
|
||||
const avg = latencies.reduce((a, b) => a + b, 0) / latencies.length
|
||||
|
||||
// Not all hosts responded
|
||||
if (latencies.length < arrayOfPings.length) {
|
||||
return { status: "DEGRADED", latency: avg }
|
||||
}
|
||||
|
||||
// Check percentile thresholds
|
||||
if (p99 > P99_THRESHOLD) {
|
||||
return { status: "DOWN", latency: avg }
|
||||
} else if (p95 > P95_THRESHOLD) {
|
||||
return { status: "DEGRADED", latency: avg }
|
||||
}
|
||||
|
||||
return { status: "UP", latency: avg }
|
||||
})
|
||||
```
|
||||
|
||||
## Common Use Cases {#common-use-cases}
|
||||
|
||||
### Database Servers {#use-case-databases}
|
||||
|
||||
| Database | Default Port | Notes |
|
||||
| :--------- | :----------- | :------------------------- |
|
||||
| PostgreSQL | 5432 | May have multiple replicas |
|
||||
| MySQL | 3306 | Check primary and replicas |
|
||||
| MongoDB | 27017 | Replica set members |
|
||||
| Redis | 6379 | Cluster or sentinel nodes |
|
||||
| Cassandra | 9042 | Multiple nodes in ring |
|
||||
|
||||
### Message Queues {#use-case-message-queues}
|
||||
|
||||
| Service | Default Port | Notes |
|
||||
| :------- | :----------- | :------------------------- |
|
||||
| RabbitMQ | 5672 | AMQP port (15672 for HTTP) |
|
||||
| Kafka | 9092 | Broker port |
|
||||
| ActiveMQ | 61616 | OpenWire protocol |
|
||||
| NATS | 4222 | Client port |
|
||||
|
||||
### Other Services {#use-case-other-services}
|
||||
|
||||
| Service | Default Port | Notes |
|
||||
| :------------ | :----------- | :----------------- |
|
||||
| SSH | 22 | Remote access |
|
||||
| SMTP | 25, 587, 465 | Email sending |
|
||||
| IMAP | 143, 993 | Email receiving |
|
||||
| FTP | 21 | File transfer |
|
||||
| DNS | 53 | Name resolution |
|
||||
| LDAP | 389, 636 | Directory services |
|
||||
| Elasticsearch | 9200, 9300 | HTTP and transport |
|
||||
| Memcached | 11211 | Caching |
|
||||
|
||||
## Best Practices {#best-practices}
|
||||
|
||||
### Timeout Configuration {#best-practices-timeout}
|
||||
|
||||
- **Local services**: 500-1000ms timeout
|
||||
- **Same datacenter**: 1000-2000ms timeout
|
||||
- **Cross-region**: 2000-5000ms timeout
|
||||
- **Global services**: 5000-10000ms timeout
|
||||
|
||||
### Host Configuration {#best-practices-hosts}
|
||||
|
||||
1. **Monitor critical ports**: Focus on the ports that matter for your application's functionality.
|
||||
2. **Include redundancy checks**: Monitor all nodes in clustered services.
|
||||
3. **Consider dependencies**: A database monitor might check both primary and read replicas.
|
||||
4. **Use appropriate IP versions**: Use IPv6 (`"IP6"`) when services are only reachable via IPv6.
|
||||
|
||||
### Evaluation Logic {#best-practices-evaluation}
|
||||
|
||||
1. **Match business requirements**: Define what "UP", "DEGRADED", and "DOWN" mean for your service.
|
||||
2. **Consider partial failures**: Distributed systems may have partial availability.
|
||||
3. **Account for latency**: Slow responses may indicate impending problems.
|
||||
4. **Use quorum logic**: For clustered services, check if enough nodes are healthy.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Common Issues {#common-issues}
|
||||
|
||||
| Issue | Possible Cause | Solution |
|
||||
| :-------------------- | :------------------------------------- | :-------------------------------------------------- |
|
||||
| Always timeout | Firewall blocking connection | Check firewall rules, security groups |
|
||||
| Always error | Service not running or wrong port | Verify service status and port number |
|
||||
| Intermittent timeouts | Network congestion or service overload | Increase timeout or investigate service performance |
|
||||
| IPv6 not working | Network doesn't support IPv6 | Use `"tcp"` type for IPv4 |
|
||||
| High latency | Geographic distance or network issues | Use closer monitoring location or adjust thresholds |
|
||||
|
||||
### Debug Tips {#debug-tips}
|
||||
|
||||
1. **Test manually**: Use `telnet host port` or `nc -zv host port` to verify connectivity.
|
||||
2. **Check DNS**: Ensure hostname resolves correctly.
|
||||
3. **Verify firewall**: Confirm the monitoring server can reach the target.
|
||||
4. **Review logs**: Check Kener logs for detailed error messages.
|
||||
- **Timeout**: network path/firewall issue or slow target
|
||||
- **Error**: wrong host/port or service not listening
|
||||
- **Validation fails**: host type and address format must match
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
---
|
||||
title: Pages
|
||||
description: Create and configure status pages, monitor visibility, and display preferences
|
||||
---
|
||||
|
||||
Use Pages to create separate status views (for example: `home`, `services`, or `infrastructure`) and control which monitors appear on each page.
|
||||
|
||||
## Create a page {#create-a-page}
|
||||
|
||||
1. Open **Manage → Pages**.
|
||||
2. Click **New Page**.
|
||||
3. Fill required fields:
|
||||
- **Path** (URL segment)
|
||||
- **Title**
|
||||
- **Header**
|
||||
4. Click **Create Page**.
|
||||
|
||||
> [!NOTE]
|
||||
> The page path is automatically sanitized to a URL-friendly value (lowercase, spaces become `-`).
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The default home page (`/`) is created by default. You cannot change its path and you cannot delete it.
|
||||
|
||||
## General information fields {#general-information-fields}
|
||||
|
||||
| Field | Required | Description |
|
||||
| -------------- | -------- | ----------------------------------------------- |
|
||||
| `Path` | Yes | URL path for the page (for example `services`). |
|
||||
| `Title` | Yes | Browser tab title. |
|
||||
| `Header` | Yes | Main heading shown on the status page. |
|
||||
| `Page Content` | No | Markdown content shown under the header. |
|
||||
| `Page Logo` | No | Optional logo image for the page. |
|
||||
|
||||
## Add monitors to a page {#add-monitors-to-a-page}
|
||||
|
||||
In **Page Monitors**:
|
||||
|
||||
1. Select a monitor from the dropdown.
|
||||
2. Click **Add**.
|
||||
3. Repeat for all monitors you want on this page.
|
||||
|
||||
Remove a monitor by clicking the remove button next to it.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Only monitors added to a page are shown on that page.
|
||||
|
||||
## Display settings {#display-settings}
|
||||
|
||||
Each page has its own display preferences.
|
||||
|
||||
### Monitor status history days {#monitor-status-history-days}
|
||||
|
||||
- **Desktop days**: how many days of status history to show on desktop.
|
||||
- **Mobile days**: how many days of status history to show on mobile.
|
||||
|
||||
### Monitor layout style {#monitor-layout-style}
|
||||
|
||||
Choose one layout:
|
||||
|
||||
- `default-list`
|
||||
- `default-grid`
|
||||
- `compact-list`
|
||||
- `compact-grid`
|
||||
|
||||
Click **Save Preferences** after changes.
|
||||
|
||||
## Delete a page {#delete-a-page}
|
||||
|
||||
Non-home pages can be deleted from **Danger Zone**.
|
||||
|
||||
To confirm deletion, type:
|
||||
|
||||
```text
|
||||
delete <page_path>
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
delete services
|
||||
```
|
||||
|
||||
## Tips {#tips}
|
||||
|
||||
- Keep page paths short and stable (changing links later is disruptive).
|
||||
- Create pages by audience (for example: public services vs internal systems).
|
||||
- Add only relevant monitors per page to keep status pages readable.
|
||||
- Use [Sharing Monitors](/docs/v4/sharing) to control badge/embed visibility per monitor.
|
||||
@@ -0,0 +1,61 @@
|
||||
---
|
||||
title: Site Customizations
|
||||
description: Configure footer, colors, fonts, theme, announcements, and custom CSS behavior
|
||||
---
|
||||
|
||||
Use **Manage → Customizations** to control visual behavior of the public status page.
|
||||
|
||||
## What you can customize {#what-you-can-customize}
|
||||
|
||||
- Site footer HTML
|
||||
- Status colors (light + dark)
|
||||
- Font source and font family
|
||||
- Default theme and theme toggle
|
||||
- Site-wide announcement banner
|
||||
- Custom CSS (stored now, injection pending)
|
||||
|
||||
## Runtime impact map {#runtime-impact-map}
|
||||
|
||||
| Customization | Stored key | Runtime impact |
|
||||
| -------------- | --------------------------- | ------------------------------------------------------------- |
|
||||
| Footer | `footerHTML` | Rendered by public footer component |
|
||||
| Status colors | `colors`, `colorsDark` | Injected as CSS variables in public layout `<head>` |
|
||||
| Font | `font` (`cssSrc`, `family`) | Loads font stylesheet and applies global `--font-family` |
|
||||
| Theme defaults | `theme`, `themeToggle` | Sets initial mode and controls whether users can toggle theme |
|
||||
| Announcement | `announcement` | Renders site banner when title + message are present |
|
||||
| Custom CSS | `customCSS` | Saved in site data, not yet injected into public layout |
|
||||
|
||||
## Footer HTML {#footer-html}
|
||||
|
||||
Footer content is rendered as HTML on the public page.
|
||||
|
||||
> [!CAUTION]
|
||||
> Keep footer HTML trusted and minimal. It is rendered directly.
|
||||
|
||||
## Colors and fonts {#colors-and-fonts}
|
||||
|
||||
Color settings feed CSS variables for status/accent colors in the public layout. Font settings:
|
||||
|
||||
- load `font.cssSrc` as stylesheet
|
||||
- set `--font-family`
|
||||
- apply global font family
|
||||
|
||||
## Theme and announcement {#theme-and-announcement}
|
||||
|
||||
- `theme` sets default mode (`light`, `dark`, `system`)
|
||||
- `themeToggle` controls whether users can switch theme in UI
|
||||
- announcement banner appears when both `title` and `message` are set
|
||||
|
||||
## Custom CSS status {#custom-css-status}
|
||||
|
||||
`customCSS` is already persisted through site data APIs, but public layout injection is not enabled yet.
|
||||
|
||||
Planned approach: append a `<style>` block in public layout head using saved `customCSS` (after validation/sanitization policy is finalized).
|
||||
|
||||
## Verify changes {#verify-changes}
|
||||
|
||||
- Update footer and refresh page.
|
||||
- Change color values and confirm status/accent colors update.
|
||||
- Set font URL/family and confirm typography changes.
|
||||
- Toggle theme settings and verify theme toggle visibility.
|
||||
- Create announcement and verify banner appears.
|
||||
@@ -3,313 +3,109 @@ title: Database Setup
|
||||
description: Configure SQLite, PostgreSQL, or MySQL database for Kener
|
||||
---
|
||||
|
||||
Kener uses a relational database to store monitors, incidents, subscriptions, and other data. It supports three database systems: SQLite, PostgreSQL, and MySQL.
|
||||
Kener stores monitor data, incidents, and subscriptions in a relational database.
|
||||
|
||||
## Supported Databases {#supported-databases}
|
||||
Supported databases:
|
||||
|
||||
- **SQLite** (default) - Zero configuration, file-based database
|
||||
- **PostgreSQL** - Enterprise-grade relational database
|
||||
- **MySQL** - Popular open-source database
|
||||
- **SQLite** (default)
|
||||
- **PostgreSQL** (recommended for production)
|
||||
- **MySQL**
|
||||
|
||||
## Default Configuration {#default-configuration}
|
||||
## Quick configuration {#quick-configuration}
|
||||
|
||||
Kener uses **SQLite** as the default database. No configuration is required to get started.
|
||||
|
||||
### Default Settings {#default-settings}
|
||||
|
||||
- **Database Type**: SQLite
|
||||
- **Database File**: `./database/kener.sqlite.db`
|
||||
- **Auto-migration**: Enabled on startup
|
||||
|
||||
The database file is automatically created in the `database` folder in your project root when you first start Kener.
|
||||
|
||||
## Database Configuration {#database-configuration}
|
||||
|
||||
All database configuration is done through the `DATABASE_URL` environment variable. The connection string format determines which database system Kener will use.
|
||||
|
||||
### Connection String Format {#connection-string-format}
|
||||
|
||||
```
|
||||
<database-type>://<connection-details>
|
||||
```
|
||||
|
||||
Kener automatically detects the database type from the URL prefix:
|
||||
|
||||
- `sqlite://` → SQLite
|
||||
- `postgresql://` → PostgreSQL
|
||||
- `mysql://` → MySQL
|
||||
|
||||
## SQLite Configuration {#sqlite-configuration}
|
||||
|
||||
SQLite is the simplest option and requires no external database server.
|
||||
|
||||
### Minimum Configuration {#sqlite-minimum}
|
||||
|
||||
**No configuration needed!** Kener will automatically use SQLite with default settings.
|
||||
|
||||
### Custom SQLite Path {#sqlite-custom-path}
|
||||
|
||||
To use a custom database file location:
|
||||
Set `DATABASE_URL` in `.env`.
|
||||
|
||||
```env
|
||||
DATABASE_URL=sqlite://./database/my-kener.db
|
||||
# Default (if omitted)
|
||||
DATABASE_URL=sqlite://./database/kener.sqlite.db
|
||||
|
||||
# PostgreSQL
|
||||
DATABASE_URL=postgresql://user:password@host:5432/kener
|
||||
|
||||
# MySQL
|
||||
DATABASE_URL=mysql://user:password@host:3306/kener
|
||||
```
|
||||
|
||||
Or store it in a different directory:
|
||||
Kener detects the database from the URL prefix:
|
||||
|
||||
- `sqlite://`
|
||||
- `postgresql://`
|
||||
- `mysql://`
|
||||
|
||||
## Minimum server sizing (CPU/RAM) {#minimum-server-sizing}
|
||||
|
||||
> [!NOTE]
|
||||
> Based on practical Kener v4 deployment patterns (app + Redis + relational DB). If you scale monitor count or retention, scale resources accordingly.
|
||||
|
||||
### Kener app instance {#app-instance-sizing}
|
||||
|
||||
| Workload | vCPU | RAM | Typical use |
|
||||
| ----------- | ---- | ----- | -------------------------------------- |
|
||||
| Minimum | 1 | 512MB | Testing, small personal setup |
|
||||
| Recommended | 1 | 1GB | Small production (up to ~100 monitors) |
|
||||
| Higher load | 2 | 2GB+ | 100+ monitors, higher request volume |
|
||||
|
||||
### Database service {#database-service-sizing}
|
||||
|
||||
| Database | Minimum | Recommended |
|
||||
| ---------- | ----------------- | ---------------------------- |
|
||||
| SQLite | Uses app disk/RAM | SSD storage, regular backups |
|
||||
| PostgreSQL | 1 vCPU / 512MB | 1 vCPU / 1-2GB |
|
||||
| MySQL | 1 vCPU / 512MB | 1 vCPU / 1-2GB |
|
||||
|
||||
## SQLite {#sqlite}
|
||||
|
||||
Good for single-instance and simple deployments.
|
||||
|
||||
```env
|
||||
DATABASE_URL=sqlite://./database/kener.sqlite.db
|
||||
```
|
||||
|
||||
Custom path example:
|
||||
|
||||
```env
|
||||
DATABASE_URL=sqlite:///var/lib/kener/kener.db
|
||||
```
|
||||
|
||||
> **Note:** Ensure the directory exists and the Kener process has write permissions.
|
||||
## PostgreSQL {#postgresql}
|
||||
|
||||
### SQLite Requirements {#sqlite-requirements}
|
||||
|
||||
- ✅ No external database server needed
|
||||
- ✅ No additional dependencies
|
||||
- ✅ Automatic file creation
|
||||
- ⚠️ Directory must be writable by Kener process
|
||||
|
||||
## PostgreSQL Configuration {#postgresql-configuration}
|
||||
|
||||
PostgreSQL is recommended for production deployments with high traffic or multiple instances.
|
||||
|
||||
### Minimum Configuration {#postgresql-minimum}
|
||||
Recommended for production and multi-instance deployments.
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://username:password@localhost:5432/kener
|
||||
DATABASE_URL=postgresql://kener:password@localhost:5432/kener
|
||||
```
|
||||
|
||||
### Connection String Format {#postgresql-format}
|
||||
|
||||
```
|
||||
postgresql://[user]:[password]@[host]:[port]/[database]
|
||||
```
|
||||
|
||||
### Example Configurations {#postgresql-examples}
|
||||
|
||||
**Local PostgreSQL:**
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://kener:secretpassword@localhost:5432/kenerdb
|
||||
```
|
||||
|
||||
**Remote PostgreSQL:**
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://dbuser:dbpass@db.example.com:5432/kener
|
||||
```
|
||||
|
||||
**PostgreSQL with SSL:**
|
||||
With SSL:
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://user:pass@host:5432/kener?sslmode=require
|
||||
```
|
||||
|
||||
**Cloud PostgreSQL (e.g., Supabase, Neon, Railway):**
|
||||
## MySQL {#mysql}
|
||||
|
||||
Use when MySQL/MariaDB is your standard stack.
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://user:pass@db.project.supabase.co:5432/postgres?sslmode=require
|
||||
DATABASE_URL=mysql://kener:password@localhost:3306/kener
|
||||
```
|
||||
|
||||
### PostgreSQL Requirements {#postgresql-requirements}
|
||||
## Switching databases {#switching-databases}
|
||||
|
||||
- ✅ PostgreSQL 12 or higher
|
||||
- ✅ Database must exist before starting Kener
|
||||
- ✅ User must have CREATE and ALTER privileges for migrations
|
||||
- ✅ Tables will be created automatically on first run
|
||||
1. Backup/export data.
|
||||
2. Update `DATABASE_URL`.
|
||||
3. Restart Kener.
|
||||
|
||||
## MySQL Configuration {#mysql-configuration}
|
||||
|
||||
MySQL is a popular choice for many hosting environments.
|
||||
|
||||
### Minimum Configuration {#mysql-minimum}
|
||||
|
||||
```env
|
||||
DATABASE_URL=mysql://username:password@localhost:3306/kener
|
||||
```
|
||||
|
||||
### Connection String Format {#mysql-format}
|
||||
|
||||
```
|
||||
mysql://[user]:[password]@[host]:[port]/[database]
|
||||
```
|
||||
|
||||
### Example Configurations {#mysql-examples}
|
||||
|
||||
**Local MySQL:**
|
||||
|
||||
```env
|
||||
DATABASE_URL=mysql://root:password@localhost:3306/kenerdb
|
||||
```
|
||||
|
||||
**Remote MySQL:**
|
||||
|
||||
```env
|
||||
DATABASE_URL=mysql://dbuser:dbpass@db.example.com:3306/kener
|
||||
```
|
||||
|
||||
**MySQL with options:**
|
||||
|
||||
```env
|
||||
DATABASE_URL=mysql://user:pass@host:3306/kener?charset=utf8mb4
|
||||
```
|
||||
|
||||
### MySQL Requirements {#mysql-requirements}
|
||||
|
||||
- ✅ MySQL 5.7 or higher (or MariaDB 10.2+)
|
||||
- ✅ Database must exist before starting Kener
|
||||
- ✅ User must have CREATE and ALTER privileges for migrations
|
||||
- ✅ Tables will be created automatically on first run
|
||||
|
||||
## Database Migrations {#database-migrations}
|
||||
|
||||
Kener uses migrations to manage database schema changes. Migrations run automatically when you start Kener, but you can also run them manually.
|
||||
|
||||
### Automatic Migrations {#automatic-migrations}
|
||||
|
||||
By default, Kener automatically runs migrations on startup:
|
||||
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
This ensures your database schema is always up to date.
|
||||
|
||||
### Manual Migrations {#manual-migrations}
|
||||
|
||||
To run migrations manually:
|
||||
|
||||
```bash
|
||||
npm run migrate
|
||||
```
|
||||
|
||||
This is useful for:
|
||||
|
||||
- Debugging migration issues
|
||||
- Pre-applying migrations in CI/CD pipelines
|
||||
- Separating migration execution from application startup
|
||||
|
||||
## Switching Databases {#switching-databases}
|
||||
|
||||
To switch from one database to another:
|
||||
|
||||
1. **Export your data** (if needed)
|
||||
2. **Update `DATABASE_URL`** in your `.env` file
|
||||
3. **Restart Kener** - migrations will run automatically
|
||||
4. **Reconfigure monitors and settings**
|
||||
|
||||
> **Warning:** Switching databases does not migrate data. You'll need to set up monitors and configurations again, or manually migrate data between databases.
|
||||
|
||||
## Production Recommendations {#production-recommendations}
|
||||
|
||||
### For Small to Medium Deployments {#small-medium}
|
||||
|
||||
**SQLite** is perfectly fine for:
|
||||
|
||||
- Single-instance deployments
|
||||
- Up to 100 monitors
|
||||
- Low to medium traffic
|
||||
- Simple setup requirements
|
||||
|
||||
### For Large or Multi-Instance Deployments {#large-deployments}
|
||||
|
||||
Use **PostgreSQL** or **MySQL** for:
|
||||
|
||||
- Multiple Kener instances
|
||||
- High availability setups
|
||||
- 100+ monitors
|
||||
- High traffic scenarios
|
||||
- Cloud deployments
|
||||
|
||||
### Performance Tips {#performance-tips}
|
||||
|
||||
1. **PostgreSQL**: Enable connection pooling for better performance
|
||||
2. **MySQL**: Use InnoDB engine (default)
|
||||
3. **SQLite**: Store database on SSD for faster I/O
|
||||
4. **All databases**: Regular backups are essential
|
||||
> [!WARNING]
|
||||
> Switching databases does **not** migrate existing data automatically.
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Database connection failed {#connection-failed}
|
||||
- Connection failed: verify host, port, credentials, firewall.
|
||||
- Migration failed: ensure DB exists and user can `CREATE`/`ALTER`.
|
||||
- SQLite write error: ensure directory exists and is writable.
|
||||
|
||||
1. Verify `DATABASE_URL` is correctly formatted
|
||||
2. Ensure database server is running (PostgreSQL/MySQL)
|
||||
3. Check username and password are correct
|
||||
4. Verify database exists and user has permissions
|
||||
5. Check firewall rules allow connection
|
||||
|
||||
### Migration errors {#migration-errors}
|
||||
|
||||
1. Check database user has CREATE and ALTER privileges
|
||||
2. Ensure database exists before running migrations
|
||||
3. Try running migrations manually: `npm run migrate`
|
||||
4. Check migration logs for specific errors
|
||||
|
||||
### SQLite permissions error {#sqlite-permissions}
|
||||
|
||||
1. Ensure `database` folder exists
|
||||
2. Check folder is writable: `chmod 755 database`
|
||||
3. Verify Kener process user has write permissions
|
||||
|
||||
### PostgreSQL/MySQL "database does not exist" {#database-not-exist}
|
||||
|
||||
Create the database before starting Kener:
|
||||
|
||||
**PostgreSQL:**
|
||||
|
||||
```sql
|
||||
CREATE DATABASE kener;
|
||||
```
|
||||
|
||||
**MySQL:**
|
||||
|
||||
```sql
|
||||
CREATE DATABASE kener CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
||||
```
|
||||
|
||||
## Environment Variables Summary {#environment-variables}
|
||||
## Environment variables {#environment-variables}
|
||||
|
||||
| Variable | Description | Default | Required |
|
||||
| -------------- | -------------------------- | ------------------------------------- | -------- |
|
||||
| `DATABASE_URL` | Database connection string | `sqlite://./database/kener.sqlite.db` | No |
|
||||
|
||||
## Examples by Platform {#platform-examples}
|
||||
|
||||
### Docker Compose with PostgreSQL {#docker-postgres}
|
||||
|
||||
```yaml
|
||||
version: "3.8"
|
||||
services:
|
||||
kener:
|
||||
image: rajnandan1/kener
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://kener:password@postgres:5432/kener
|
||||
depends_on:
|
||||
- postgres
|
||||
|
||||
postgres:
|
||||
image: postgres:15
|
||||
environment:
|
||||
- POSTGRES_DB=kener
|
||||
- POSTGRES_USER=kener
|
||||
- POSTGRES_PASSWORD=password
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
```
|
||||
|
||||
### Railway with PostgreSQL {#railway}
|
||||
|
||||
Railway automatically provides a `DATABASE_URL` environment variable when you add a PostgreSQL database. Simply use the provided URL.
|
||||
|
||||
### Vercel with Supabase {#vercel-supabase}
|
||||
|
||||
1. Create a Supabase project
|
||||
2. Get the connection string from Supabase dashboard
|
||||
3. Add to Vercel environment variables:
|
||||
```
|
||||
DATABASE_URL=postgresql://postgres:[password]@db.[project].supabase.co:5432/postgres?sslmode=require
|
||||
```
|
||||
|
||||
@@ -115,6 +115,9 @@ SMTP_SENDER=noreply@yourdomain.com
|
||||
3. Ensure sender email is authorized by your SMTP provider
|
||||
4. Check server logs for detailed error messages
|
||||
|
||||
> [!TIP]
|
||||
> User invitations and verification emails depend on email setup. See [User Management](/docs/v4/user-management).
|
||||
|
||||
### Wrong port/SSL configuration {#wrong-port-ssl}
|
||||
|
||||
If you're getting connection errors, verify:
|
||||
|
||||
@@ -3,253 +3,61 @@ title: Redis Setup
|
||||
description: Configure Redis for queues, caching, and scheduling in Kener
|
||||
---
|
||||
|
||||
Redis is a **required component** for Kener, providing background job processing, caching, and distributed scheduling capabilities. Redis powers Kener's core infrastructure for queues, caching, and scheduling.
|
||||
Redis is **required**. Kener will not start without `REDIS_URL`.
|
||||
|
||||
## What Redis Does {#what-redis-does}
|
||||
## Quick configuration {#quick-configuration}
|
||||
|
||||
Redis powers three critical systems in Kener:
|
||||
|
||||
### 1. Queue System {#queue-system}
|
||||
|
||||
Uses [BullMQ](https://docs.bullmq.io/) to process background jobs:
|
||||
|
||||
- **Monitor execution queue** - Runs monitor checks
|
||||
- **Monitor response queue** - Processes monitoring results
|
||||
- **Alerting queue** - Handles alert notifications
|
||||
- **Subscriber queue** - Manages subscription triggers
|
||||
- **Email queue** - Sends email notifications
|
||||
|
||||
### 2. Caching Layer {#caching-layer}
|
||||
|
||||
Caches frequently accessed data to reduce database load:
|
||||
|
||||
- Site configuration data
|
||||
- Monitor metadata
|
||||
- Documentation search index
|
||||
- API responses
|
||||
|
||||
### 3. Scheduler {#scheduler}
|
||||
|
||||
Manages distributed cron-based monitoring:
|
||||
|
||||
- **Monitor schedulers** - Execute monitors based on cron expressions
|
||||
- **App scheduler** - Handles recurring application tasks
|
||||
- **Maintenance scheduler** - Manages maintenance windows
|
||||
|
||||
> **Important:** Redis is required for Kener to function. Without Redis, the application will not start.
|
||||
|
||||
## Configuration {#configuration}
|
||||
|
||||
Redis is configured using a single environment variable: `REDIS_URL`.
|
||||
|
||||
### Environment Variable {#environment-variable}
|
||||
Add this to `.env`:
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://localhost:6379
|
||||
```
|
||||
|
||||
### Connection String Format {#connection-string-format}
|
||||
Use `rediss://` when your provider requires TLS.
|
||||
|
||||
#### Standard Redis {#standard-redis}
|
||||
## Minimum server sizing (CPU/RAM) {#minimum-server-sizing}
|
||||
|
||||
```
|
||||
> [!NOTE]
|
||||
> Based on practical Kener v4 deployments (app + Redis + relational DB). If monitor count and alert volume grow, scale up.
|
||||
|
||||
### Redis service sizing {#redis-service-sizing}
|
||||
|
||||
| Workload | vCPU | RAM |
|
||||
| ----------- | -------------- | ------ |
|
||||
| Minimum | shared / 0.25+ | 128MB |
|
||||
| Recommended | 0.5+ | 256MB |
|
||||
| Higher load | 1+ | 512MB+ |
|
||||
|
||||
### Kener app sizing impact (with Redis enabled) {#app-sizing-impact}
|
||||
|
||||
| Workload | vCPU | RAM |
|
||||
| ----------- | ---- | ----- |
|
||||
| Minimum | 1 | 512MB |
|
||||
| Recommended | 1 | 1GB |
|
||||
| Higher load | 2+ | 2GB+ |
|
||||
|
||||
## Connection formats {#connection-formats}
|
||||
|
||||
```text
|
||||
redis://[username:password@]host[:port][/database]
|
||||
```
|
||||
|
||||
#### Redis with TLS {#redis-tls}
|
||||
|
||||
```
|
||||
rediss://[username:password@]host[:port][/database]
|
||||
```
|
||||
|
||||
The extra `s` in `rediss://` enables TLS/SSL encryption.
|
||||
|
||||
## Setup Examples {#setup-examples}
|
||||
|
||||
### Local Redis {#local-redis}
|
||||
|
||||
**Basic connection:**
|
||||
Examples:
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://localhost:6379
|
||||
REDIS_URL=redis://:password@localhost:6379
|
||||
REDIS_URL=rediss://default:password@your-endpoint:6379
|
||||
```
|
||||
|
||||
**With authentication:**
|
||||
## Requirements {#requirements}
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://:your-password@localhost:6379
|
||||
```
|
||||
- Redis **6.0+** (7.0+ recommended)
|
||||
- Standard read/write commands enabled
|
||||
- Network access from Kener to Redis host/port
|
||||
|
||||
**Specific database:**
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://localhost:6379/2
|
||||
```
|
||||
|
||||
### Docker Redis {#docker-redis}
|
||||
|
||||
**Docker Compose with Redis:**
|
||||
|
||||
```yaml
|
||||
version: "3.8"
|
||||
services:
|
||||
kener:
|
||||
image: rajnandan1/kener
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379
|
||||
depends_on:
|
||||
- redis
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
|
||||
volumes:
|
||||
redis_data:
|
||||
```
|
||||
|
||||
### Cloud Redis Services {#cloud-redis}
|
||||
|
||||
#### Upstash Redis {#upstash}
|
||||
|
||||
```env
|
||||
REDIS_URL=rediss://default:your-password@your-endpoint.upstash.io:6379
|
||||
```
|
||||
|
||||
#### Redis Cloud {#redis-cloud}
|
||||
|
||||
```env
|
||||
REDIS_URL=rediss://default:your-password@redis-12345.cloud.redislabs.com:12345
|
||||
```
|
||||
|
||||
#### AWS ElastiCache {#elasticache}
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://your-cluster.cache.amazonaws.com:6379
|
||||
```
|
||||
|
||||
#### Azure Cache for Redis {#azure-redis}
|
||||
|
||||
```env
|
||||
REDIS_URL=rediss://:your-password@your-cache.redis.cache.windows.net:6380
|
||||
```
|
||||
|
||||
#### Railway Redis {#railway-redis}
|
||||
|
||||
Railway automatically provides `REDIS_URL` when you add a Redis service. Use the provided URL directly.
|
||||
|
||||
#### Render Redis {#render-redis}
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://red-xxxxx:6379
|
||||
```
|
||||
|
||||
## Minimum Requirements {#minimum-requirements}
|
||||
|
||||
### Redis Version {#redis-version}
|
||||
|
||||
- **Minimum**: Redis 6.0+
|
||||
- **Recommended**: Redis 7.0+
|
||||
|
||||
### Memory {#memory}
|
||||
|
||||
- **Small deployments** (< 50 monitors): 128 MB
|
||||
- **Medium deployments** (50-200 monitors): 256 MB
|
||||
- **Large deployments** (200+ monitors): 512 MB+
|
||||
|
||||
### Commands Required {#commands-required}
|
||||
|
||||
Kener requires these Redis commands to be available:
|
||||
|
||||
- `GET`, `SET`, `DEL` - Basic operations
|
||||
- `EXPIRE`, `TTL` - For cache expiration
|
||||
- `LPUSH`, `RPOP`, `BRPOP` - For queue operations
|
||||
- `ZADD`, `ZRANGE`, `ZREM` - For scheduled jobs
|
||||
|
||||
> Most Redis services support all these commands by default. However, some managed services (like AWS ElastiCache with cluster mode) may have restrictions.
|
||||
|
||||
## Installation Options {#installation-options}
|
||||
|
||||
### Option 1: Local Redis (Development) {#local-installation}
|
||||
|
||||
**macOS (Homebrew):**
|
||||
|
||||
```bash
|
||||
brew install redis
|
||||
brew services start redis
|
||||
```
|
||||
|
||||
**Ubuntu/Debian:**
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install redis-server
|
||||
sudo systemctl start redis-server
|
||||
sudo systemctl enable redis-server
|
||||
```
|
||||
|
||||
**Docker:**
|
||||
|
||||
```bash
|
||||
docker run -d -p 6379:6379 redis:7-alpine
|
||||
```
|
||||
|
||||
### Option 2: Cloud Services (Production) {#cloud-installation}
|
||||
|
||||
For production, use managed Redis services:
|
||||
|
||||
- **Upstash** - Serverless Redis, generous free tier
|
||||
- **Redis Cloud** - Official managed Redis
|
||||
- **AWS ElastiCache** - AWS managed Redis
|
||||
- **Azure Cache** - Azure managed Redis
|
||||
- **Railway** - Simplified deployment platform
|
||||
|
||||
## Queue Configuration {#queue-configuration}
|
||||
|
||||
Kener automatically creates these queues in Redis:
|
||||
|
||||
| Queue Name | Purpose | Concurrency |
|
||||
| --------------------------- | -------------------------- | ------------ |
|
||||
| `monitorExecuteQueue` | Execute monitor checks | 5 |
|
||||
| `monitorResponseQueue` | Process monitor results | 5 |
|
||||
| `alertingQueue` | Send alerts | 5 |
|
||||
| `subscriberQueue` | Process subscriptions | 5 |
|
||||
| `emailQueue` | Send individual emails | 5 |
|
||||
| `monitorScheduleQueue` | Schedule monitor execution | Configurable |
|
||||
| `appSchedulerQueue` | Application-level tasks | 1 |
|
||||
| `maintenanceSchedulerQueue` | Maintenance windows | 1 |
|
||||
|
||||
All queues use the prefix `kener:` in Redis keys.
|
||||
|
||||
### Queue Features {#queue-features}
|
||||
|
||||
- **Automatic retry**: Failed jobs retry up to 3 times with exponential backoff
|
||||
- **Job cleanup**: Completed jobs are automatically removed
|
||||
- **Persistence**: Failed jobs are kept for debugging
|
||||
- **Concurrency**: Multiple workers process jobs in parallel
|
||||
|
||||
## Cache Configuration {#cache-configuration}
|
||||
|
||||
Kener uses Redis for caching with these defaults:
|
||||
|
||||
- **Key prefix**: `kener:cache:`
|
||||
- **Default TTL**: 300 seconds (5 minutes)
|
||||
- **Storage format**: JSON
|
||||
|
||||
### Cached Data {#cached-data}
|
||||
|
||||
- Site configuration and settings
|
||||
- Monitor metadata and status
|
||||
- Documentation search index
|
||||
- API response data
|
||||
|
||||
## Monitoring Redis {#monitoring-redis}
|
||||
|
||||
### Check Redis Connection {#check-connection}
|
||||
|
||||
Use the Redis CLI to verify connectivity:
|
||||
## Verify connection {#verify-connection}
|
||||
|
||||
```bash
|
||||
redis-cli -u $REDIS_URL ping
|
||||
@@ -257,151 +65,28 @@ redis-cli -u $REDIS_URL ping
|
||||
|
||||
Expected response: `PONG`
|
||||
|
||||
### View Queue Status {#view-queue-status}
|
||||
|
||||
Check active queues:
|
||||
|
||||
```bash
|
||||
redis-cli -u $REDIS_URL --scan --pattern "kener:*"
|
||||
```
|
||||
|
||||
### Monitor Memory Usage {#monitor-memory}
|
||||
|
||||
```bash
|
||||
redis-cli -u $REDIS_URL INFO memory
|
||||
```
|
||||
|
||||
## Troubleshooting {#troubleshooting}
|
||||
|
||||
### Connection Failed {#connection-failed}
|
||||
- `REDIS_URL is not defined`: add `REDIS_URL` to `.env` and restart.
|
||||
- `NOAUTH Authentication required`: include password in the URL.
|
||||
- Connection timeout: verify host/port/firewall and Redis service status.
|
||||
- `READONLY`: connect to primary/writer instance, not a read replica.
|
||||
|
||||
**Error:** `REDIS_URL is not defined in environment variables`
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Verify `REDIS_URL` is set in `.env` file
|
||||
2. Ensure connection string format is correct
|
||||
3. Restart Kener after adding the variable
|
||||
|
||||
### Authentication Failed {#authentication-failed}
|
||||
|
||||
**Error:** `NOAUTH Authentication required`
|
||||
|
||||
**Solution:**
|
||||
Include password in connection string:
|
||||
|
||||
```env
|
||||
REDIS_URL=redis://:your-password@host:6379
|
||||
```
|
||||
|
||||
### TLS/SSL Issues {#tls-issues}
|
||||
|
||||
**Error:** `Error: self signed certificate`
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Use `rediss://` protocol for TLS connections
|
||||
2. Verify your Redis service requires/supports TLS
|
||||
3. For self-signed certificates, you may need to configure Node.js to accept them (not recommended for production)
|
||||
|
||||
### Connection Timeout {#connection-timeout}
|
||||
|
||||
**Error:** `Connection timeout`
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Verify Redis server is running
|
||||
2. Check firewall rules allow connection
|
||||
3. Ensure host and port are correct
|
||||
4. Test connection with `redis-cli`
|
||||
|
||||
### Too Many Connections {#too-many-connections}
|
||||
|
||||
**Error:** `ERR max number of clients reached`
|
||||
|
||||
**Solution:**
|
||||
|
||||
1. Increase Redis `maxclients` setting
|
||||
2. Review connection pooling
|
||||
3. Check for connection leaks in application logs
|
||||
|
||||
### Commands Not Allowed {#commands-not-allowed}
|
||||
|
||||
**Error:** `READONLY You can't write against a read only replica`
|
||||
|
||||
**Solution:**
|
||||
Ensure you're connecting to the master/primary Redis instance, not a read replica.
|
||||
|
||||
## Performance Optimization {#performance-optimization}
|
||||
|
||||
### For Small Deployments {#small-optimization}
|
||||
|
||||
```env
|
||||
# Standard Redis with minimal config
|
||||
REDIS_URL=redis://localhost:6379
|
||||
```
|
||||
|
||||
### For Large Deployments {#large-optimization}
|
||||
|
||||
**Consider:**
|
||||
|
||||
1. **Redis Cluster** for horizontal scaling
|
||||
2. **Persistent storage** with AOF or RDB snapshots
|
||||
3. **Connection pooling** (handled automatically by ioredis)
|
||||
4. **Monitoring** with Redis insights or external tools
|
||||
|
||||
### Memory Management {#memory-management}
|
||||
|
||||
Kener automatically manages Redis memory with:
|
||||
|
||||
- **TTL on cache keys** (5 minutes default)
|
||||
- **Job cleanup** (completed jobs removed automatically)
|
||||
- **Efficient serialization** (JSON format)
|
||||
|
||||
For manual cleanup:
|
||||
|
||||
```bash
|
||||
# Clear all Kener cache
|
||||
redis-cli -u $REDIS_URL --scan --pattern "kener:cache:*" | xargs redis-cli -u $REDIS_URL DEL
|
||||
|
||||
# Clear specific queue
|
||||
redis-cli -u $REDIS_URL --scan --pattern "kener:monitorExecuteQueue:*" | xargs redis-cli -u $REDIS_URL DEL
|
||||
```
|
||||
|
||||
## Security Best Practices {#security-best-practices}
|
||||
|
||||
1. **Use authentication**: Always set a password on Redis
|
||||
2. **Enable TLS**: Use `rediss://` for encrypted connections
|
||||
3. **Restrict access**: Configure firewall to allow only Kener server
|
||||
4. **Regular backups**: Enable Redis persistence (AOF/RDB)
|
||||
5. **Monitor access**: Review Redis logs for suspicious activity
|
||||
6. **Update regularly**: Keep Redis version up to date
|
||||
|
||||
## Environment Variables Summary {#environment-variables}
|
||||
## Environment variables {#environment-variables}
|
||||
|
||||
| Variable | Description | Default | Required |
|
||||
| ----------- | ----------------------- | ------- | -------- |
|
||||
| `REDIS_URL` | Redis connection string | None | **Yes** |
|
||||
|
||||
## Migration Guide {#migration-guide}
|
||||
## Free Redis SaaS options (indie-friendly) {#free-redis-saas-options}
|
||||
|
||||
### Changing Redis Instances {#changing-instances}
|
||||
If you want a managed Redis/Valkey service without running your own server:
|
||||
|
||||
1. **Stop Kener**
|
||||
2. **Update `REDIS_URL`** in `.env` file
|
||||
3. **Start Kener** with new Redis instance
|
||||
| Provider | Free option | Best for | Quick notes |
|
||||
| -------------------------------------------------------- | ------------------------------------- | ---------------------------------- | ------------------------------------------------------- |
|
||||
| [Railway Redis](https://railway.com/new/template/redis) | Free usage via Railway trial credits | Fast demos and MVPs | Very easy setup; credit-based (not permanent free tier) |
|
||||
| [Upstash](https://upstash.com/) | Free tier (serverless, request-based) | Side projects and low-traffic apps | Great DX; works well for serverless/edge |
|
||||
| [Aiven for Valkey](https://aiven.io/free-redis-database) | Free managed Valkey plan | Persistent dev/test environments | Redis-compatible Valkey; simple managed setup |
|
||||
|
||||
> **Note:** Active jobs and cached data will be lost during the switch. Schedule the change during low-activity periods.
|
||||
|
||||
## Production Checklist {#production-checklist}
|
||||
|
||||
- [ ] Redis 7.0+ installed
|
||||
- [ ] Password authentication enabled
|
||||
- [ ] TLS/SSL configured (for cloud deployments)
|
||||
- [ ] Firewall rules configured
|
||||
- [ ] Persistence enabled (AOF or RDB)
|
||||
- [ ] Memory limits set appropriately
|
||||
- [ ] Monitoring/alerting configured
|
||||
- [ ] Backup strategy in place
|
||||
- [ ] `REDIS_URL` added to environment variables
|
||||
- [ ] Connection tested and verified
|
||||
> [!NOTE]
|
||||
> Free limits and pricing change over time. Check each provider’s current limits before production use.
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
---
|
||||
title: Site Configuration
|
||||
description: Configure core site settings and understand where they affect runtime behavior
|
||||
---
|
||||
|
||||
Use **Manage → Site Configurations** to control identity, navigation, monitor sharing controls, retention, and event visibility.
|
||||
|
||||
## Quick setup {#quick-setup}
|
||||
|
||||
1. Open **Manage → Site Configurations**.
|
||||
2. Save **Site Information** (`siteName`, `siteURL`, logo, favicon).
|
||||
3. Configure **Navigation Menu**.
|
||||
4. Set **Monitor Sub Menu Options**.
|
||||
5. Configure **Data Retention Policy**.
|
||||
6. Configure **Event Display Settings**.
|
||||
|
||||
## Runtime impact map {#runtime-impact-map}
|
||||
|
||||
| Setting area | Stored key | Runtime impact |
|
||||
| ---------------------------- | ------------------------------------ | ------------------------------------------------------------------------ |
|
||||
| Site name / URL / logo / nav | `siteName`, `siteURL`, `logo`, `nav` | Rendered in top navbar branding and nav links |
|
||||
| Favicon | `favicon` | Used in `<head>` as page icon |
|
||||
| Monitor sub menu options | `subMenuOptions` | Gates monitor share actions (badges/embed) on public monitor pages |
|
||||
| Data retention policy | `dataRetentionPolicy` | Controls daily cleanup of old `monitoring_data` |
|
||||
| Event display settings | `eventDisplaySettings` | Filters incidents/maintenances returned for dashboard/home notifications |
|
||||
|
||||
## Monitor sub menu options {#monitor-sub-menu-options}
|
||||
|
||||
These site-level flags control share actions globally:
|
||||
|
||||
- `showShareBadgeMonitor`
|
||||
- `showShareEmbedMonitor`
|
||||
|
||||
> [!IMPORTANT]
|
||||
> These site-level toggles are combined with monitor-level sharing options. If site-level is disabled, monitor-level cannot force it on.
|
||||
|
||||
See [Sharing Monitors](/docs/v4/sharing).
|
||||
|
||||
## Data retention policy {#data-retention-policy}
|
||||
|
||||
`dataRetentionPolicy` drives the daily cleanup scheduler:
|
||||
|
||||
- `enabled`: turn cleanup on/off
|
||||
- `retentionDays`: how many days of monitor data to keep
|
||||
|
||||
When enabled, cleanup runs daily at midnight UTC.
|
||||
|
||||
## Event display settings {#event-display-settings}
|
||||
|
||||
`eventDisplaySettings` controls which events are visible:
|
||||
|
||||
- incidents: ongoing/resolved + resolved limits
|
||||
- maintenances: ongoing/past/upcoming + limits
|
||||
|
||||
This affects:
|
||||
|
||||
- event sections on status pages
|
||||
- notifications payload API used by the UI
|
||||
|
||||
## Verify changes {#verify-changes}
|
||||
|
||||
- Update site name/logo/nav and refresh home page.
|
||||
- Toggle monitor share options and verify Badge/Embed actions on a monitor page.
|
||||
- Change event display settings and verify incident/maintenance visibility.
|
||||
- Set retention policy and confirm scheduler logs in server output.
|
||||
@@ -0,0 +1,91 @@
|
||||
---
|
||||
title: Sharing Monitors
|
||||
description: Configure badge and embed sharing, including site-level and monitor-level controls
|
||||
---
|
||||
|
||||
Kener supports sharing monitor data through **badges** and **embeds**, plus a **live events** embed.
|
||||
|
||||
## What you can share {#what-you-can-share}
|
||||
|
||||
### Badges {#badges}
|
||||
|
||||
From **Manage → Badges**, you can generate:
|
||||
|
||||
- Status badge
|
||||
- Uptime badge
|
||||
- Latency badge (`average`, `maximum`, `minimum`)
|
||||
- Dot badges
|
||||
|
||||
You can customize style, label, colors, and time range.
|
||||
|
||||
### Embeds {#embeds}
|
||||
|
||||
From **Manage → Embed**, you can generate:
|
||||
|
||||
- **Status Bar** embed for a monitor
|
||||
- **Latency Chart** embed for a monitor
|
||||
- **Live Events** embed (incidents + maintenances)
|
||||
|
||||
You can copy output as `iframe` or `script`.
|
||||
|
||||
### Live Events widget {#live-events-widget}
|
||||
|
||||
Live Events embed supports:
|
||||
|
||||
- incidents on/off
|
||||
- maintenances on/off
|
||||
- optional monitor tag filtering
|
||||
|
||||
This lets you show one combined events feed or a scoped feed for selected monitors.
|
||||
|
||||
## Where controls are configured {#where-controls-are-configured}
|
||||
|
||||
### Site-level controls {#site-level-controls}
|
||||
|
||||
In **Manage → Site Configurations → Monitor Sub Menu Options**:
|
||||
|
||||
- `showShareBadgeMonitor`
|
||||
- `showShareEmbedMonitor`
|
||||
|
||||
These are global toggles for monitor sharing actions.
|
||||
|
||||
### Monitor-level controls {#monitor-level-controls}
|
||||
|
||||
In **Manage → Monitors → [monitor] → Sharing Options**:
|
||||
|
||||
- `showShareBadgeMonitor`
|
||||
- `showShareEmbedMonitor`
|
||||
|
||||
These are saved per monitor in `monitor_settings_json.sharing_options`.
|
||||
|
||||
## Precedence rules (important) {#precedence-rules}
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Site-level settings override monitor-level settings.
|
||||
|
||||
A sharing action is visible only when **both** are enabled:
|
||||
|
||||
- site-level toggle is enabled
|
||||
- monitor-level toggle is enabled
|
||||
|
||||
If site-level is disabled, monitor-level cannot re-enable it.
|
||||
|
||||
## How this appears on the public monitor page {#public-monitor-page-behavior}
|
||||
|
||||
On a monitor page, the share buttons in the top action bar are shown only when allowed by both levels:
|
||||
|
||||
- **Badges** menu requires site + monitor badge permission.
|
||||
- **Embed** menu requires site + monitor embed permission.
|
||||
|
||||
## Recommended setup flow {#recommended-setup-flow}
|
||||
|
||||
1. Enable global sharing defaults in **Site Configurations**.
|
||||
2. For sensitive monitors, disable badge/embed in monitor **Sharing Options**.
|
||||
3. Use **Manage → Badges** and **Manage → Embed** to generate final snippets.
|
||||
4. Verify visibility from a public monitor page.
|
||||
|
||||
## Related pages {#related-pages}
|
||||
|
||||
- [Monitors Overview](/docs/v4/monitors/overview)
|
||||
- [Pages](/docs/v4/pages)
|
||||
- [Configuration](/docs/v4/configuration)
|
||||
@@ -0,0 +1,100 @@
|
||||
---
|
||||
title: User Subscriptions
|
||||
description: Let users subscribe to incident and maintenance email updates
|
||||
---
|
||||
|
||||
Use subscriptions to let users receive email updates for **incidents** and **maintenances**.
|
||||
|
||||
## How subscriptions work {#how-subscriptions-work}
|
||||
|
||||
1. Admin enables subscriptions in dashboard settings.
|
||||
2. Email must be configured (SMTP/Resend) so verification and update emails can be sent.
|
||||
3. User subscribes from the public status page using email + OTP verification.
|
||||
4. User enables incidents and/or maintenances preferences.
|
||||
5. When events are triggered, Kener sends emails to active subscribers for that event type.
|
||||
|
||||
## Prerequisites {#prerequisites}
|
||||
|
||||
Before users can subscribe:
|
||||
|
||||
- Subscriptions must be enabled in **Manage → Subscriptions**.
|
||||
- At least one email subscription type must be enabled (`incidents` and/or `maintenances`).
|
||||
- Email setup must be valid.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The subscribe UI is shown only when subscriptions are enabled **and** email sending is available.
|
||||
|
||||
See [Email Setup](/docs/v4/setup/email-setup).
|
||||
|
||||
## Enable subscriptions (admin) {#enable-subscriptions-admin}
|
||||
|
||||
Go to **Manage → Subscriptions**:
|
||||
|
||||
1. Turn on **Enable Subscriptions**.
|
||||
2. Enable one or both:
|
||||
- **Incident Updates**
|
||||
- **Maintenance Updates**
|
||||
3. Save.
|
||||
|
||||
This writes the `subscriptionsSettings` site configuration used by the subscription API.
|
||||
|
||||
## How users subscribe (public flow) {#how-users-subscribe-public-flow}
|
||||
|
||||
From the status page, user clicks **Subscribe**:
|
||||
|
||||
1. Enter email.
|
||||
2. Kener sends a 6-digit verification code.
|
||||
3. User enters OTP to verify.
|
||||
4. Kener stores a subscriber session token in browser storage.
|
||||
5. User toggles preferences for incidents and/or maintenances.
|
||||
|
||||
If token is invalid/expired, user is asked to verify again.
|
||||
|
||||
## Add subscriber from backend (admin) {#add-subscriber-from-backend-admin}
|
||||
|
||||
You can add subscribers manually from **Manage → Subscriptions**:
|
||||
|
||||
1. Click **Add Subscriber**.
|
||||
2. Enter email.
|
||||
3. Select event types (incidents, maintenances).
|
||||
4. Save.
|
||||
|
||||
Admins can also:
|
||||
|
||||
- Toggle incident/maintenance subscription per subscriber
|
||||
- Delete subscribers
|
||||
|
||||
## When notifications are triggered {#when-notifications-are-triggered}
|
||||
|
||||
### Incident notifications {#incident-notifications}
|
||||
|
||||
Incident subscription emails are queued when alert-driven incident lifecycle changes happen, including:
|
||||
|
||||
- Incident created from alert trigger
|
||||
- Incident resolved from alert recovery
|
||||
|
||||
### Maintenance notifications {#maintenance-notifications}
|
||||
|
||||
Maintenance subscription emails are queued on maintenance event status transitions:
|
||||
|
||||
- `READY` (starting soon)
|
||||
- `ONGOING`
|
||||
- `COMPLETED`
|
||||
|
||||
## How emails are delivered {#how-emails-are-delivered}
|
||||
|
||||
When an event is queued:
|
||||
|
||||
1. Kener selects active subscribers for that `event_type`.
|
||||
2. Kener renders the `subscription_update` email template with event/site variables.
|
||||
3. Kener enqueues **one email per recipient** (privacy-safe fan-out).
|
||||
4. Email sender queue sends the final emails.
|
||||
|
||||
## Verify your setup {#verify-your-setup}
|
||||
|
||||
- Enable subscriptions in dashboard.
|
||||
- Confirm email setup works.
|
||||
- Subscribe with a test email from public status page.
|
||||
- Enable incident and/or maintenance preferences.
|
||||
- Trigger a test incident or wait for maintenance state transition.
|
||||
- Confirm email arrives.
|
||||
@@ -0,0 +1,111 @@
|
||||
---
|
||||
title: User Management
|
||||
description: Manage users, roles, invitations, and role permissions in Kener
|
||||
---
|
||||
|
||||
Use **Manage → Users** to invite teammates, control access, and manage account status.
|
||||
|
||||
## Roles overview {#roles-overview}
|
||||
|
||||
Kener uses three roles:
|
||||
|
||||
| Role | What it means |
|
||||
| -------- | ------------------------------------------------------------------------------------------------------------ |
|
||||
| `admin` | Full access, including user administration and vault/API-key level operations |
|
||||
| `editor` | Can run day-to-day operations (monitors, incidents, maintenances, site settings) but cannot administer users |
|
||||
| `member` | Limited access; cannot administer users or change system settings |
|
||||
|
||||
## What each role can do {#what-each-role-can-do}
|
||||
|
||||
### Admin {#admin}
|
||||
|
||||
Admin can:
|
||||
|
||||
- invite users
|
||||
- resend invitations
|
||||
- change user role (`member` / `editor`)
|
||||
- activate/deactivate users
|
||||
- send verification email to any user
|
||||
- perform all editor-level operational actions
|
||||
- manage admin-only areas like vault and certain privileged API actions
|
||||
|
||||
### Editor {#editor}
|
||||
|
||||
Editor can:
|
||||
|
||||
- invite users
|
||||
- resend invitation emails
|
||||
- manage monitors, incidents, maintenances, alerts, triggers, pages, subscriptions, and site data
|
||||
|
||||
Editor cannot:
|
||||
|
||||
- change user roles
|
||||
- activate/deactivate users
|
||||
- perform admin-only user administration actions
|
||||
|
||||
### Member {#member}
|
||||
|
||||
Member can:
|
||||
|
||||
- sign in and use allowed views
|
||||
- send verification email for their own account (if unverified)
|
||||
|
||||
Member cannot:
|
||||
|
||||
- invite users
|
||||
- resend invitations
|
||||
- change roles
|
||||
- activate/deactivate other users
|
||||
- perform admin/editor configuration actions
|
||||
|
||||
## Invite flow {#invite-flow}
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Email must be configured before invitation flow can be used.
|
||||
|
||||
From **Manage → Users**:
|
||||
|
||||
1. Click **Add User**.
|
||||
2. Enter name, email, and role (`editor` or `member`).
|
||||
3. Invitation email is sent with a secure token link.
|
||||
|
||||
Current behavior:
|
||||
|
||||
- invited user is created with inactive account and empty password
|
||||
- invitation token expires after 7 days
|
||||
|
||||
## How users accept invitation {#how-users-accept-invitation}
|
||||
|
||||
When user opens invitation link:
|
||||
|
||||
1. Token is validated (view + token + expiry).
|
||||
2. User sets password on invitation page.
|
||||
3. On success, account is activated and marked verified.
|
||||
4. User signs in normally.
|
||||
|
||||
If link is invalid, expired, or already used, invitation page shows an error and user cannot activate from that link.
|
||||
|
||||
## Verification emails {#verification-emails}
|
||||
|
||||
- Admin/editor can send verification email to users.
|
||||
- Member can only trigger verification for their own account.
|
||||
|
||||
## Common user management tasks {#common-user-management-tasks}
|
||||
|
||||
- **Promote/demote user**: admin updates role in user settings sheet.
|
||||
- **Deactivate user**: admin toggles account inactive (session access removed).
|
||||
- **Re-invite user**: resend invitation if user has not set password yet.
|
||||
|
||||
## Requirements and dependencies {#requirements-and-dependencies}
|
||||
|
||||
- Email setup is required for:
|
||||
- inviting users
|
||||
- resending invitation emails
|
||||
- verification emails
|
||||
|
||||
See [Email Setup](/docs/v4/setup/email-setup).
|
||||
|
||||
## Related pages {#related-pages}
|
||||
|
||||
- [Site Configuration](/docs/v4/setup/site-configuration)
|
||||
- [User Subscriptions](/docs/v4/subscriptions)
|
||||
@@ -69,13 +69,124 @@ function normalizeSidebar(sidebar: DocsSidebarGroupSource[]): DocsSidebarGroup[]
|
||||
}));
|
||||
}
|
||||
|
||||
function createTabKey(tabName: string): string {
|
||||
return tabName
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9\s-]/g, "")
|
||||
.replace(/\s+/g, "-")
|
||||
.replace(/-+/g, "-")
|
||||
.replace(/^-|-$/g, "");
|
||||
}
|
||||
|
||||
function sidebarHasSlug(sidebar: DocsSidebarGroupSource[], pageSlug: string, versionSlug?: string): boolean {
|
||||
if (!pageSlug) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const normalizedSidebar = normalizeSidebar(sidebar);
|
||||
const availableSlugs = new Set<string>();
|
||||
|
||||
function addPages(pages: DocsPage[]) {
|
||||
for (const page of pages) {
|
||||
availableSlugs.add(page.slug);
|
||||
if (page.pages && page.pages.length > 0) {
|
||||
addPages(page.pages);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const group of normalizedSidebar) {
|
||||
addPages(group.pages);
|
||||
}
|
||||
|
||||
if (availableSlugs.has(pageSlug)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (versionSlug && pageSlug) {
|
||||
const versionPrefixed = `${versionSlug}/${pageSlug}`;
|
||||
if (availableSlugs.has(versionPrefixed)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (versionSlug && pageSlug.startsWith(`${versionSlug}/`)) {
|
||||
const unprefixed = pageSlug.slice(versionSlug.length + 1);
|
||||
if (availableSlugs.has(unprefixed)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function getPrimarySidebarSource(version: DocsVersion): DocsSidebarGroupSource[] {
|
||||
const tabs = version.content.navigation?.tabs ?? [];
|
||||
const firstSidebarTab = tabs.find((tab) => (tab.sidebar?.length ?? 0) > 0);
|
||||
return firstSidebarTab?.sidebar ?? [];
|
||||
}
|
||||
|
||||
function normalizeNavigationTabs(version: DocsVersion) {
|
||||
const tabs = version.content.navigation?.tabs ?? [];
|
||||
return tabs.map((tab, index) => {
|
||||
const fallbackKey = `tab-${index + 1}`;
|
||||
const key = createTabKey(tab.name) || fallbackKey;
|
||||
|
||||
return {
|
||||
...tab,
|
||||
key,
|
||||
firstPageSlug: getFirstPageSlugFromSidebar(normalizeSidebar(tab.sidebar ?? [])),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function resolveActiveTab(version: DocsVersion, requestedTabKey?: string, requestedPageSlug?: string) {
|
||||
const tabs = normalizeNavigationTabs(version);
|
||||
const firstSidebarTab = tabs.find((tab) => (tab.sidebar?.length ?? 0) > 0) ?? tabs[0];
|
||||
|
||||
if (!firstSidebarTab) {
|
||||
return {
|
||||
tabs,
|
||||
activeTab: null,
|
||||
};
|
||||
}
|
||||
|
||||
if (requestedTabKey) {
|
||||
const requested = tabs.find((tab) => tab.key === requestedTabKey);
|
||||
if (requested && (requested.sidebar?.length ?? 0) > 0) {
|
||||
return {
|
||||
tabs,
|
||||
activeTab: requested,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (requestedPageSlug) {
|
||||
const matchedByPage = tabs.find(
|
||||
(tab) => (tab.sidebar?.length ?? 0) > 0 && sidebarHasSlug(tab.sidebar ?? [], requestedPageSlug, version.slug),
|
||||
);
|
||||
if (matchedByPage) {
|
||||
return {
|
||||
tabs,
|
||||
activeTab: matchedByPage,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
tabs,
|
||||
activeTab: firstSidebarTab,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeVersionMeta(versions: DocsVersion[]): DocsVersionMeta[] {
|
||||
return versions.map((version) => ({
|
||||
...(version as DocsVersionMeta),
|
||||
name: version.name,
|
||||
slug: version.slug,
|
||||
latest: version.latest,
|
||||
firstPageSlug: getFirstPageSlugFromSidebar(normalizeSidebar(version.content.sidebar)),
|
||||
firstPageSlug: getFirstPageSlugFromSidebar(normalizeSidebar(getPrimarySidebarSource(version))),
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -105,7 +216,11 @@ export function getDocsRootConfig(): DocsRootConfig {
|
||||
/**
|
||||
* Get the docs configuration for a selected version slug (or latest)
|
||||
*/
|
||||
export function getDocsConfig(requestedVersionSlug?: string): DocsConfig {
|
||||
export function getDocsConfig(
|
||||
requestedVersionSlug?: string,
|
||||
requestedTabKey?: string,
|
||||
requestedPageSlug?: string,
|
||||
): DocsConfig {
|
||||
const rootConfig = getDocsRootConfig();
|
||||
const versions = rootConfig.versions;
|
||||
const selectedVersion = getSelectedVersion(versions, requestedVersionSlug);
|
||||
@@ -118,19 +233,27 @@ export function getDocsConfig(requestedVersionSlug?: string): DocsConfig {
|
||||
sidebar: [],
|
||||
versions: [],
|
||||
activeVersion: null,
|
||||
activeTabKey: null,
|
||||
};
|
||||
}
|
||||
|
||||
const { tabs, activeTab } = resolveActiveTab(selectedVersion, requestedTabKey, requestedPageSlug);
|
||||
const activeSidebarSource = activeTab?.sidebar ?? [];
|
||||
|
||||
return {
|
||||
$schema: rootConfig.$schema,
|
||||
name: rootConfig.name,
|
||||
logo: rootConfig.logo,
|
||||
favicon: rootConfig.favicon,
|
||||
navigation: selectedVersion.content.navigation,
|
||||
sidebar: normalizeSidebar(selectedVersion.content.sidebar),
|
||||
navigation: {
|
||||
...selectedVersion.content.navigation,
|
||||
tabs,
|
||||
},
|
||||
sidebar: normalizeSidebar(activeSidebarSource),
|
||||
footerLinks: selectedVersion.content.footerLinks,
|
||||
versions: normalizeVersionMeta(versions),
|
||||
activeVersion: selectedVersion.slug,
|
||||
activeTabKey: activeTab?.key ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -221,6 +344,7 @@ export function getAllPages(config: DocsConfig = getDocsConfig()): DocsPage[] {
|
||||
for (const group of config.sidebar) {
|
||||
addPages(group.pages);
|
||||
}
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ export const GET: RequestHandler = async ({ params }) => {
|
||||
}
|
||||
|
||||
const requestedVersion = versionSlug;
|
||||
const config = getDocsConfig(requestedVersion);
|
||||
const config = getDocsConfig(requestedVersion, undefined, pageSlug);
|
||||
|
||||
const resolvedSlug = resolvePageSlugForConfig(pageSlug, config, requestedVersion);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user