Changeset 2561
- Timestamp:
- 03/02/10 00:08:44 (2 years ago)
- Location:
- Search-Query-Dialect-KSx/trunk
- Files:
-
- 3 edited
-
lib/Search/Query/Dialect/KSx.pm (modified) (5 diffs)
-
lib/Search/Query/Field/KSx.pm (modified) (3 diffs)
-
t/01-parser.t (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
Search-Query-Dialect-KSx/trunk/lib/Search/Query/Dialect/KSx.pm
r2556 r2561 5 5 use Carp; 6 6 use Data::Dump qw( dump ); 7 use Scalar::Util qw( blessed ); 7 8 use Search::Query::Field::KSx; 8 9 use KinoSearch::Search::ANDQuery; … … 122 123 123 124 if ( $self->fuzzify ) { 124 $value .= '*' unless $value =~ m/[\*\%]/; 125 } 126 127 # normalize wildcard 128 my $wildcard = $self->wildcard; 129 $value =~ s/[\*\%]/$wildcard/g; 125 $value .= '*' unless $value =~ m/[\*]/; 126 } 130 127 131 128 return $value; … … 350 347 351 348 my $quote = $clause->quote || ''; 349 my $is_phrase = $quote eq '"' ? 1 : 0; 352 350 353 351 my @buf; 354 NAME: for my $name (@fields) {352 FIELD: for my $name (@fields) { 355 353 my $field = $self->_get_field($name); 356 354 357 355 if ( defined $field->callback ) { 358 356 push( @buf, $field->callback->( $field, $op, $value ) ); 359 next NAME;357 next FIELD; 360 358 } 361 359 362 360 #warn dump [ $name, $op, $quote, $value ]; 363 361 364 # invert fuzzy 365 if ( $op eq '!~' || ( $op eq '!:' and $value =~ m/[$wildcard\*\?]/ ) ) 366 { 367 $value .= $wildcard unless $value =~ m/\Q$wildcard/; 368 369 push( 370 @buf, 371 Search::Query::Dialect::KSx::NOTWildcardQuery->new( 372 field => $name, 373 term => $value, 374 ) 375 ); 376 } 377 378 # fuzzy 379 elsif ( $op eq '~' 380 || ( $op eq ':' and $value =~ m/[$wildcard\*\?]/ ) ) 381 { 382 $value .= $wildcard unless $value =~ m/\Q$wildcard/; 383 384 push( 385 @buf, 386 Search::Query::Dialect::KSx::WildcardQuery->new( 387 field => $name, 388 term => $value, 389 ) 390 ); 391 } 392 393 # invert 394 elsif ( $op eq '!:' ) { 395 push( 396 @buf, 397 KinoSearch::Search::NOTQuery->new( 398 field => $name, 399 term => $value, 400 ) 401 ); 402 } 403 404 # range 405 elsif ( $op eq '..' ) { 362 # range is un-analyzed 363 if ( $op eq '..' ) { 406 364 if ( ref $value ne 'ARRAY' or @$value != 2 ) { 407 365 croak "range of values must be a 2-element ARRAY"; … … 417 375 418 376 push( @buf, $range_query ); 377 next FIELD; 419 378 420 379 } … … 427 386 428 387 croak "NOT Range query not yet supported"; 429 } 430 431 # standard 432 else { 388 next FIELD; # haha. never get here. 389 } 390 391 $self->debug and warn "value before:$value"; 392 my @values = ($value); 393 394 # if the field has an analyzer, use it on $value 395 if ( blessed( $field->analyzer ) && !ref $value ) { 396 397 # preserve any wildcards 398 if ( $value =~ m/[$wildcard\*\?]/ ) { 399 400 # assume CaseFolder 401 $value = lc($value); 402 403 # split on whitespace, not token regex 404 my @tok = split( m/\s+/, $value ); 405 406 # if stemmer, apply only to prefix if at all. 407 my $stemmer; 408 if ($field->analyzer->isa( 409 'KinoSearch::Analysis::PolyAnalyzer') 410 ) 411 { 412 413 # KS currently broken with no get_analyzers() method. 414 # my $analyzers = $field->analyzer->get_analyzers(); 415 # for my $ana (@$analyzers) { 416 # if ( $ana->isa('KinoSearch::Analysis::Stemmer') 417 # or $ana->can('stem') ) 418 # { 419 # $stemmer = $ana; 420 # last; 421 # } 422 # } 423 } 424 elsif ($field->analyzer->isa('KinoSearch::Analysis::Stemmer') 425 or $field->analyzer->can('stem') ) 426 { 427 $stemmer = $field->analyzer; 428 } 429 430 if ($stemmer) { 431 carp "found stemmer"; 432 for my $tok (@tok) { 433 if ( $tok =~ m/^\w\*$/ ) { 434 $tok = $stemmer->stem($tok); 435 } 436 } 437 } 438 439 } 440 else { 441 @values = grep { defined and length } 442 @{ $field->analyzer->split($value) }; 443 } 444 } 445 446 $self->debug and warn "value after :" . dump( \@values ); 447 448 if ( $is_phrase or @values > 1 ) { 433 449 push( 434 450 @buf, 435 KinoSearch::Search:: TermQuery->new(451 KinoSearch::Search::PhraseQuery->new( 436 452 field => $name, 437 term => $value,453 terms => \@values, 438 454 ) 439 455 ); 440 456 } 457 else { 458 my $term = $values[0]; 459 460 # invert fuzzy 461 if ( $op eq '!~' 462 || ( $op eq '!:' and $term =~ m/[$wildcard\*\?]/ ) ) 463 { 464 $term .= $wildcard unless $term =~ m/\Q$wildcard/; 465 466 push( 467 @buf, 468 Search::Query::Dialect::KSx::NOTWildcardQuery->new( 469 field => $name, 470 term => $term, 471 ) 472 ); 473 } 474 475 # fuzzy 476 elsif ( $op eq '~' 477 || ( $op eq ':' and $term =~ m/[$wildcard\*\?]/ ) ) 478 { 479 $term .= $wildcard unless $term =~ m/\Q$wildcard/; 480 481 push( 482 @buf, 483 Search::Query::Dialect::KSx::WildcardQuery->new( 484 field => $name, 485 term => $term, 486 ) 487 ); 488 } 489 490 # invert 491 elsif ( $op eq '!:' ) { 492 push( 493 @buf, 494 KinoSearch::Search::NOTQuery->new( 495 field => $name, 496 term => $term, 497 ) 498 ); 499 } 500 501 # standard 502 else { 503 push( 504 @buf, 505 KinoSearch::Search::TermQuery->new( 506 field => $name, 507 term => $term, 508 ) 509 ); 510 } 511 512 } # TERM 441 513 } 442 514 if ( @buf == 1 ) { -
Search-Query-Dialect-KSx/trunk/lib/Search/Query/Field/KSx.pm
r2552 r2561 3 3 use warnings; 4 4 use base qw( Search::Query::Field ); 5 use Scalar::Util qw( blessed ); 5 6 6 __PACKAGE__->mk_accessors(qw( type is_int ));7 __PACKAGE__->mk_accessors(qw( type is_int analyzer )); 7 8 8 9 our $VERSION = '0.01'; … … 37 38 =item type 38 39 39 The column type.a 40 The column type. This may be a KinoSearch::FieldType object 41 or a simple string. 40 42 41 43 =item is_int 42 44 43 45 Set if C<type> matches m/int|num|date/. 46 47 =item analyzer 48 49 Set to a KinoSearch::Analysis::Analyzer-based object (optional). 44 50 45 51 =back … … 54 60 55 61 # numeric types 56 if ( $self->{type} =~ m/int|date|num/ ) {62 if ( !blessed( $self->{type} ) && $self->{type} =~ m/int|date|num/ ) { 57 63 $self->{is_int} = 1; 58 64 } -
Search-Query-Dialect-KSx/trunk/t/01-parser.t
r2556 r2561 3 3 use strict; 4 4 use warnings; 5 use Test::More tests => 5 3;5 use Test::More tests => 54; 6 6 use Data::Dump qw( dump ); 7 7 8 use KinoSearch::Analysis::PolyAnalyzer; 9 my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en', ); 10 8 11 use_ok('Search::Query::Parser'); 9 12 10 13 ok( my $parser = Search::Query::Parser->new( 11 fields => [qw( foo color name )], 14 fields => { 15 foo => { analyzer => $analyzer }, 16 color => { analyzer => $analyzer }, 17 name => { analyzer => $analyzer }, 18 }, 12 19 default_field => 'name', 13 20 dialect => 'KSx', … … 19 26 #dump $parser; 20 27 21 ok( my $query1 = $parser->parse('foo= bar'), "query1" );22 23 is( $query1, qq/foo: bar/, "query1 string" );28 ok( my $query1 = $parser->parse('foo=BAR'), "query1" ); 29 30 is( $query1, qq/foo:BAR/, "query1 string" ); 24 31 25 32 ok( my $ks_query1 = $query1->as_ks_query(), "as_ks_query" ); 26 33 ok( $ks_query1->isa('KinoSearch::Search::TermQuery'), 27 34 "ks_query isa TermQuery" ); 28 29 ok( my $query2 = $parser->parse('foo:bar'), "query2" ); 30 31 is( $query2, qq/foo:bar/, "query2 string" ); 32 33 ok( my $query3 = $parser->parse('foo bar'), "query3" ); 34 35 is( $query3, qq/name:foo AND name:bar/, "query3 string" ); 35 is( $ks_query1->to_string, "foo:bar", "KS query analyzer applied" ); 36 37 ok( my $query2 = $parser->parse('foo:BaR'), "query2" ); 38 39 is( $query2, qq/foo:BaR/, "query2 string" ); 40 41 ok( my $query3 = $parser->parse('FoO bar'), "query3" ); 42 43 is( $query3, qq/name:FoO AND name:bar/, "query3 string" ); 36 44 37 45 my $str = '-color:red (name:john OR foo:bar)';
Note: See TracChangeset
for help on using the changeset viewer.