Merge pull request #191 from fluent/in_tail_multiline

support multiline for In tail
This commit is contained in:
uu59 2015-04-23 17:07:37 +09:00
commit a4db49349d
14 changed files with 430 additions and 55 deletions

View File

@ -1,5 +1,6 @@
(function(){
"use strict";
var maxFormatCount = 20;
$(function(){
if($('#in_tail_format').length === 0) return;
@ -30,7 +31,20 @@
highlightedLines: null,
},
computed: {
useTextArea: function() {
return this.format === "multiline";
}
},
compiled: function(){
this.$watch('params.setting.formats', function(formats){
_.range(1, maxFormatCount + 1).forEach(function(i) {params.setting["format" + String(i)] = "";});
_.compact(formats.split("\n")).forEach(function(formatLine, index) {
params.setting["format" + String(index + 1)] = formatLine;
});
}),
this.$watch('params.setting.regexp', function(){
this.preview();
});
@ -46,6 +60,10 @@
if(!params.setting) {
params.setting = {};
}
var formats = _.chain(_.range(1, maxFormatCount + 1)).map(function(i) {return params.setting["format" + String(i)];}).compact().value();
params.setting.formats = formats.join("\n");
_.each(this.formatOptions, function(options){
_.each(options, function(key){
if(!params.setting.hasOwnProperty(key)){
@ -58,6 +76,12 @@
},
methods: {
onKeyup: function(ev){
var el = ev.target;
if(el.name.match(/\[format/)){
this.preview();
}
},
updateHighlightedLines: function() {
if(!this.regexpMatches) {
this.highlightedLines = null;
@ -132,6 +156,7 @@
regexp: self.params.setting.regexp,
time_format: self.params.setting.time_format,
format: _.isEmpty(self.format) ? "regexp" : self.format,
params: self.params.setting,
file: self.targetFile
}
}).done(resolve).fail(reject);

View File

@ -19,17 +19,9 @@ class ApiController < ApplicationController
end
def regexp_preview
preview = RegexpPreview.new(params[:file], params[:format], regexp: params[:regexp], time_format: params[:time_format])
matches = preview.matches
render json: {
params: {
setting: {
regexp: preview.regexp.try(:source),
time_format: preview.time_format,
}
},
matches: matches.compact,
}
preview = RegexpPreview.processor(params[:format]).new(params[:file], params[:format], params)
render json: preview.matches_json
end
def grok_to_regexp

View File

@ -1,6 +1,8 @@
class Fluentd
module Setting
class InTail
MULTI_LINE_MAX_FORMAT_COUNT = 20
include ActiveModel::Model
attr_accessor :path, :tag, :format, :regexp, :time_format, :rotate_wait, :pos_file, :read_from_head, :refresh_interval
@ -18,12 +20,13 @@ class Fluentd
:ltsv => [:delimiter, :time_key],
:json => [:time_key],
:regexp => [:time_format, :regexp],
:multiline => [:format_firstline] + (1..MULTI_LINE_MAX_FORMAT_COUNT).map{|n| "format#{n}".to_sym }
# TODO: Grok could generate Regexp including \d, \s, etc. fluentd config parser raise error with them for escape sequence check.
# TBD How to handle Grok/Regexp later, just comment out for hide
# :grok => [:grok_str],
}
end
attr_accessor *known_formats.values.flatten.compact
attr_accessor *known_formats.values.flatten.compact.uniq
def known_formats
self.class.known_formats
@ -59,9 +62,20 @@ class Fluentd
indent = " " * 2
format_specific_conf = ""
extra_format_options.each do |key|
format_specific_conf << "#{indent}#{key} #{send(key)}\n"
if format.to_sym == :multiline
known_formats[:multiline].each do |key|
value = send(key)
if value.present?
format_specific_conf << "#{indent}#{key} /#{value}/\n"
end
end
else
extra_format_options.each do |key|
format_specific_conf << "#{indent}#{key} #{send(key)}\n"
end
end
format_specific_conf
end

View File

@ -12,7 +12,7 @@
= f.text_field :path, class: "form-control", disabled: true
= render partial: "shared/vue/in_tail_format", locals: { file: f.object.path, formats: @setting.known_formats, initialSelected: f.object.format || @setting.guess_format }
%pre= file_tail(@setting.path).join("\n")
%pre= file_tail(@setting.path, Settings.in_tail_preview_line_count).join("\n")
%p
= f.submit t('terms.next'), class: "btn btn-lg btn-primary pull-right"

View File

@ -6,8 +6,21 @@
</select>
</div>
<div class="form-inline form-group" v-repeat="options">
<label for="in_tail_setting_{{ $value }}">{{ $value }} </label>
<input id="in_tail_setting_{{ $value }}" type="text" name="setting[{{ $value }}]" v-model="params.setting[$value]" size="100%" class="form-control" />
<label for="in_tail_setting_{{ $value }}" v-if="!useTextArea">{{ $value }} </label>
<input id="in_tail_setting_{{ $value }}" type="{{ useTextArea ? 'hidden' : 'text' }}" name="setting[{{ $value }}]" v-model="params.setting[$value]" v-on="keyup: onKeyup" size="100%" class="form-control" />
</div>
<div v-if="useTextArea">
<div class="form-inline form-group">
<label for="in_tail_setting_format_firstline">format_firstline</label>
<input id="in_tail_setting_format_firstline" type="text" name="setting[format_firstline]" v-model="params.setting['format_firstline']" v-on="keyup: onKeyup" size="100%" class="form-control" />
</div>
<div class="form-group">
<p class="alert alert-warning"><%= t("fluentd.settings.in_tail.notice_for_multiline_limit") %></p>
<label for="in_tail_setting_formats">formats</label>
<textarea id="in_tail_setting_formats" type="text" name="setting[formats]" v-model="params.setting['formats']" v-on="keyup: onKeyup" rows='20' size='100%' class="form-control"></textarea>
</div>
</div>
</script>

View File

@ -3,6 +3,7 @@ defaults: &defaults
default_log_tail_count: 30
histories_count_in_preview: 5
max_backup_files_count: 100
in_tail_preview_line_count: 40
recommended_plugins:
- category: filter
name: "rewrite-tag-filter"

View File

@ -211,6 +211,7 @@ en:
For each config parameter, please refer to the <a href="http://docs.fluentd.org/articles/in_tail" target="_blank">Tail input plugin</a> documentation page.
in_tail:
notice_for_permission: "Please check permission or group setting for %{user} user can read it."
notice_for_multiline_limit: "Please input Regexp(s) separated by newline. blank lines are ignored. Lines more than 20 are dropped."
restart_from_first: Restart from first
grok_manual: |
<p>

View File

@ -216,6 +216,7 @@ ja:
<a target="_blank" href="http://docs.fluentd.org/ja/articles/in_tail">in_tailプラグインの解説ページ</a>や
<a target="_blank" href="http://fluentular.herokuapp.com/">Fluentular</a>もご参照ください。
in_tail:
notice_for_multiline_limit: "改行区切りで正規表現を入力してください。空行はカウントされません。21行目以降の入力は無視されます。"
notice_for_permission: "※%{user}ユーザーが読み込み可能なようにパーミッションやグループの設定をご確認ください。"
restart_from_first: 最初からやり直す
grok_manual: |

View File

@ -2,47 +2,16 @@
require "fluent/registry"
require "fluent/configurable"
require "fluent/parser"
require "regexp_preview/single_line"
require "regexp_preview/multi_line"
class RegexpPreview
attr_reader :file, :format, :time_format, :regexp
def initialize(file, format, options = {})
@file = file
@format = format
module RegexpPreview
def self.processor(format)
case format
when "regexp"
@regexp = Regexp.new(options[:regexp])
@time_format = options[:time_format]
when "ltsv", "json", "csv", "tsv"
when "multiline"
RegexpPreview::MultiLine
else
definition = Fluent::TextParser::TEMPLATE_REGISTRY.lookup(format).call
raise "Unknown format '#{format}'" unless definition
definition.configure({}) # NOTE: SyslogParser define @regexp in configure method so call it to grab Regexp object
@regexp = definition.patterns["format"]
@time_format = definition.patterns["time_format"]
RegexpPreview::SingleLine
end
end
def matches
return [] unless @regexp # such as ltsv, json, etc
reader = FileReverseReader.new(File.open(file))
matches = reader.tail.map do |line|
result = {
:whole => line,
:matches => [],
}
m = line.match(regexp)
next result unless m
m.names.each_with_index do |name, index|
result[:matches] << {
key: name,
matched: m[name],
pos: m.offset(index + 1),
}
end
result
end
matches
end
end

View File

@ -0,0 +1,70 @@
module RegexpPreview
class MultiLine
attr_reader :file, :format, :params
def initialize(file, format, params = {})
@file = file
@format = format
@params = params[:params]
end
def matches_json
{
params: {
setting: { # for vue.js
regexp: nil,
time_format: nil,
}
},
matches: matches.compact,
}
end
private
def matches
return [] if patterns.empty?
reader = FileReverseReader.new(File.open(file))
result = []
target_lines = reader.tail(Settings.in_tail_preview_line_count).map{|line| line << "\n" }
target_lines.each_with_index do |line, line_no|
if line.match(params[:format_firstline])
lines = target_lines[line_no, patterns.length]
next if lines.length < patterns.length
ret = detect_chunk(lines)
next unless ret
result << ret
end
end
result
end
def detect_chunk(lines)
whole = ""
matches = []
lines.each_with_index do |line, i|
match = line.match(patterns[i])
return nil unless match
match.names.each_with_index do |name, index|
matches << {
key: name,
matched: match[name],
pos: match.offset(index + 1).map{|pos| pos + whole.length},
}
end
whole << line
end
{
whole: whole,
matches: matches,
}
end
def patterns
@patterns ||= (1..20).map do |n|
params["format#{n}"].presence
end.compact.map {|pattern| Regexp.new(pattern)}
end
end
end

View File

@ -0,0 +1,65 @@
module RegexpPreview
class SingleLine
attr_reader :file, :format, :params, :regexp, :time_format
def initialize(file, format, params = {})
@file = file
@format = format
@time_format = params[:time_format]
@params = params
case format
when "regexp"
@regexp = Regexp.new(params[:regexp])
@time_format = nil
when "ltsv", "json", "csv", "tsv"
@regexp = nil
@time_format = nil
else # apache, nginx, etc
definition = Fluent::TextParser::TEMPLATE_REGISTRY.lookup(format).call
raise "Unknown format '#{format}'" unless definition
definition.configure({}) # NOTE: SyslogParser define @regexp in configure method so call it to grab Regexp object
@regexp = definition.patterns["format"]
@time_format = definition.patterns["time_format"]
end
end
def matches_json
{
params: {
setting: {
# NOTE: regexp and time_format are used when format == 'apache' || 'nginx' || etc.
regexp: regexp.try(:source),
time_format: time_format,
}
},
matches: matches.compact,
}
end
private
def matches
return [] unless @regexp # such as ltsv, json, etc
reader = FileReverseReader.new(File.open(file))
matches = reader.tail(Settings.in_tail_preview_line_count).map do |line|
result = {
:whole => line,
:matches => [],
}
match = line.match(regexp)
next result unless match
match.names.each_with_index do |name, index|
result[:matches] << {
key: name,
matched: match[name],
pos: match.offset(index + 1),
}
end
result
end
matches
end
end
end

View File

@ -0,0 +1,40 @@
require 'spec_helper'
describe RegexpPreview::MultiLine do
describe "#matches_json" do
subject { RegexpPreview::MultiLine.new(File.expand_path("./spec/support/fixtures/error0.log", Rails.root), "multiline", params).matches_json }
let :params do
params = {
format_firstline: ".+",
time_format: "time_format",
}
params["format1"] = "(?<foo>foo)"
params["format2"] = "(?<bar>bar)"
3.upto(Fluentd::Setting::InTail::MULTI_LINE_MAX_FORMAT_COUNT) do |i|
params["format#{i}"] = ""
end
{ params: params }
end
it 'should not have regexp and time_format in [:params][:setting]' do
expect(subject[:params][:setting]).to eq({ regexp: nil, time_format: nil })
end
it "should include matches info" do
matches_info = {
whole: "foo\nbar\n",
matches: [
{
key: "foo", matched: "foo", pos: [0, 3]
},
{
key: "bar", matched: "bar", pos: [4, 7]
}
]
}
expect(subject[:matches]).to include matches_info
end
end
end

View File

@ -0,0 +1,183 @@
require 'spec_helper'
describe RegexpPreview::SingleLine do
describe ".initialize" do
subject { RegexpPreview::SingleLine.new("log_file.log", format, params) }
describe "format" do
let :params do
{
regexp: "(?<category>\[.+\])",
time_format: "%y/%m/%d",
}
end
shared_examples "should set regexp and time_format from selected format" do
it do
expect(subject.regexp).to eq regexp
expect(subject.time_format).to eq time_format
expect(subject.params).to eq params
end
end
shared_examples "should set params only" do
include_examples "should set regexp and time_format from selected format" do
let(:regexp) { nil }
let(:time_format) { nil }
end
end
context "regexp" do
let(:format) { "regexp" }
it 'should set regexp from params' do
expect(subject.regexp).to eq /#{params[:regexp]}/
expect(subject.time_format).to be_nil
expect(subject.params).to eq params
end
end
context "ltsv" do
let(:format) { "ltsv" }
include_examples "should set params only"
end
context "json" do
let(:format) { "json" }
include_examples "should set params only"
end
context "csv" do
let(:format) { "csv" }
include_examples "should set params only"
end
context "tsv" do
let(:format) { "tsv" }
include_examples "should set params only"
end
context "syslog" do # "apache", "nginx", etc
let(:format) { "syslog" }
include_examples "should set regexp and time_format from selected format" do
let(:regexp) do
/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
end
let(:time_format) { "%b %d %H:%M:%S" }
end
end
context "apache" do
let(:format) { "apache" }
include_examples "should set regexp and time_format from selected format" do
let(:regexp) do
/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
end
let(:time_format) { "%d/%b/%Y:%H:%M:%S %z" }
end
end
context "nginx" do
let(:format) { "nginx" }
include_examples "should set regexp and time_format from selected format" do
let(:regexp) do
/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
end
let(:time_format) { "%d/%b/%Y:%H:%M:%S %z" }
end
end
end
end
describe "#matches_json" do
let(:logfile) { File.expand_path(logfile_path, Rails.root) }
let :params do
{
regexp: "(?<regexp>bar)", # bar from error0.log
time_format: "time_format",
}
end
subject { RegexpPreview::SingleLine.new(logfile, format, params).matches_json }
describe "format" do
context "regexp" do
let(:format) { "regexp" }
let(:logfile_path) { "./spec/support/fixtures/error0.log" }
it 'should have regexp only in [:params][:setting]' do
setting_json = {
regexp: params[:regexp],
time_format: nil
}
expect(subject[:params][:setting]).to eq setting_json
end
it 'should include matches info' do
matches_info = {
whole: "bar",
matches: [
{ key: "regexp", matched: "bar", pos: [0, 3] }
]
}
expect(subject[:matches]).to include matches_info
end
end
context "csv" do
let(:format) { "csv" }
let(:logfile_path) { "./spec/support/fixtures/error0.log" }
it 'should not have regexp and time_format in [:params][:setting]' do
setting_json = {
regexp: nil,
time_format: nil
}
expect(subject[:params][:setting]).to eq setting_json
end
it 'should not have matches_info' do
expect(subject[:matches]).to be_empty
end
end
context "syslog" do
let(:format) { "syslog" }
let(:logfile_path) { "./spec/support/fixtures/error4.log" }
it 'should set regexp and time_format from syslog format' do
setting_json = {
regexp: "^(?<time>[^ ]*\\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_/\\.\\-]*)(?:\\[(?<pid>[0-9]+)\\])?(?:[^\\:]*\\:)? *(?<message>.*)$",
time_format: "%b %d %H:%M:%S",
}
expect(subject[:params][:setting]).to eq setting_json
end
it 'should include matches info' do
matches_info = {
whole: "2014-05-27 10:54:37 +0900 [info]: listening fluent socket on 0.0.0.0:24224",
matches: [
{ key: "time", matched: "2014-05-27 10:54:37 +0900", pos: [0, 25] },
{ key: "host", matched: "[info]:", pos: [26, 33] },
{ key: "ident", matched: "listening", pos: [34, 43] },
{ key: "pid", matched: nil, pos: [nil, nil] },
{ key: "message", matched: "24224", pos: [69, 74] }
]
}
expect(subject[:matches]).to include matches_info
end
end
end
end
end

View File

@ -0,0 +1 @@
2014-05-27 10:54:37 +0900 [info]: listening fluent socket on 0.0.0.0:24224