Details the use of search tools in the Hibernate framework of Java

  • 2020-04-01 04:27:54
  • OfStack

Hibernate provides full-text indexing, which is great, so here's a quick look at how it works,
1. Introduce package dependencies in pom.xml


<dependency>
      <groupId>org.hibernate</groupId>
      <artifactId>hibernate-search-orm</artifactId>
      <version>${hibernate-search.version}</version>
    </dependency>

    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-analyzers-smartcn</artifactId>
      <version>${lucene.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-queryparser</artifactId>
      <version>${lucene.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-analyzers-phonetic</artifactId>
      <version>${lucene.version}</version>
    </dependency>

Hibernate conspires search index to save the path


 <bean id="sessionFactory"
    class="org.springframework.orm.hibernate4.LocalSessionFactoryBean"
    destroy-method="destroy">
    <property name="dataSource" ref="poolingDataSource" />
    <property name="configLocation">
      <value> classpath:hibernate.cfg.xml </value>
    </property>
    <property name="hibernateProperties">
      <props>
        <prop key="hibernate.dialect">${hibernate.dialect}</prop>
        <!-- Booleans can be easily used in expressions by 
           declaring HQL query substitutions in Hibernate configuration -->
        <prop key="hibernate.query.substitutions">true 'Y', false 'N'</prop>
        <!-- http://ehcache.org/documentation/integrations/hibernate -->
        <!-- http://www.tutorialspoint.com/hibernate/hibernate_caching.htm -->
        <prop key="hibernate.cache.use_second_level_cache">true</prop>
        <!-- org.hibernate.cache.ehcache.EhCacheRegionFactory -->
        <prop key="hibernate.cache.region.factory_class">org.hibernate.cache.ehcache.EhCacheRegionFactory</prop>
        <!-- hibernate Cache only load() Method to obtain a single persistent object if you want the cache to use it findall() ,  list() , Iterator() , createCriteria() , createQuery()
                 The method to obtain the data result set, you need to set hibernate.cache.use_query_cache true -->
        <prop key="hibernate.cache.use_query_cache">true</prop>
        <prop key="net.sf.ehcache.configurationResourceName">ehcache-hibernate.xml</prop>
        <!-- Hibernate Search index directory -->
        ***<prop key="hibernate.search.default.indexBase">indexes/</prop>*** 
      </props>
    </property>
  </bean> 

Add Indexed Annotation to the class that needs to be searched, and then add @field Annotation to the fields in the class that can be searched. Generally, Enum fields do not need Analyzer for lexical analysis, other fields do, and for those that do not need Projection (return partial fields), you do not need to store actual data in index. AnalyzerDef can be used to define different lexical analyzers and for specific word filters


@Indexed
@AnalyzerDef(
    name="enTopicAnalyzer",
    charFilters={
      @CharFilterDef(factory=HTMLStripCharFilterFactory.class)
    },
    tokenizer=@TokenizerDef(factory=StandardTokenizerFactory.class),
    filters={
      @TokenFilterDef(factory=StandardFilterFactory.class),
      @TokenFilterDef(factory=StopFilterFactory.class),
      @TokenFilterDef(factory=PhoneticFilterFactory.class,
        params = {
          @Parameter(name="encoder", value="DoubleMetaphone")
        }),
      @TokenFilterDef(factory=SnowballPorterFilterFactory.class,
        params = {
          @Parameter(name="language", value="English")
        })
      }
  )
public class Topic {

  ......
  @Field(index=Index.YES, analyze=Analyze.YES, store=Store.NO)
  @Analyzer(definition = "enTopicAnalyzer")
  private String title;
  ......
  @Field(index=Index.YES, analyze=Analyze.YES, store=Store.NO)
  @Analyzer(definition = "enTopicAnalyzer")
  private String content;
  ......  
  @Enumerated(EnumType.STRING)
  @Field(index=Index.YES, analyze=Analyze.NO, store=Store.NO, bridge=@FieldBridge(impl=EnumBridge.class))
  private TopicStatus status;
  ...
  }

Create an index with code for existing data


ApplicationContext context = new ClassPathXmlApplicationContext("spring-resources.xml");

    SessionFactory sessionFactory = (SessionFactory) context.getBean("sessionFactory");

    Session sess = sessionFactory.openSession();
    FullTextSession fullTextSession = Search.getFullTextSession(sess);
    try {
      fullTextSession.createIndexer().startAndWait();
    } catch (InterruptedException e) {
       LOG.error(e.getMessage(), e);
    } finally {
      fullTextSession.close();
    }
    ((AbstractApplicationContext)context).close();

Create the query fulltextsession and get the result according to the query condition


 FullTextSession fullTextSession = Search
        .getFullTextSession(getSession());

    QueryBuilder queryBuilder = fullTextSession.getSearchFactory()
        .buildQueryBuilder().forEntity(Show.class).get();
    org.apache.lucene.search.Query luceneQuery = null;

      luceneQuery = queryBuilder.keyword()// .wildcard()
          .onFields("title", "content").matching(query.getKeyword())
          // .matching("*" + query.getKeyword() + "*")
          .createQuery();

    FullTextQuery hibernateQuery = fullTextSession.createFullTextQuery(
        luceneQuery, Show.class);
    return hibernateQuery.list();

Note:
1. During a test, the value object was modified, a new index was added, and the rebuildIndex was forgotten. The result of unit test was ok, and the generated environment was wrong.
2. Search is not very powerful, such as the search test, the results containing the test may not be searched

Chinese lexical analysis

Hibernate search USES Lucene at the bottom, so all the Chinese word segmentation that Lucene can use and hibernate search can be used to support Chinese lexical analysis. Common lexical analyzers include paoding, IKAnalyzer, mmseg4j and so on. Specific can refer to the word segmentation analysis of recent analysis. Hibernate search default of word segmentation is org. Apache lucene. Analysis. The standard. The StandardAnalyzer, according to the word in Chinese word segmentation, obviously does not accord with our requirements.
Here's how to configure Chinese word segmentation in hibernate, using the Chinese word segmentation that comes with Lucene. There are three ways to do this: one is to set up a lexical analysis method in hibernate's configuration file, another is to define a word segmentation method in each class that needs to be searched, and the last is to configure a single field. Here are the first two configurations.
Hibernate configuration:


<property name="hibernate.search.analyzer"> org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer</property>

Searched class configuration Chinese word segmentation:


@Indexed
@Analyzer(impl=SmartChineseAnalyzer.class)

You also need to introduce package dependencies in maven


<dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-analyzers-smartcn</artifactId>
      <version>${lucene.version}</version>
    </dependency>

Multiconditional query
Hibernate search can realize multi-condition query by multi-combination condition, here is a brief introduction to a practice of multi-condition query.
If you only have a single conditional query, the query can be simple

LuceneQuery = criteriabuilder. Keyword (.) onFields (" title ", "content"). The matching (query. GetKeyword ()). The createQuery ()

If it is multi-conditional and query, then Must Join is needed. If it is multi-conditional or query, then should Join is needed. Here is an example of Must Join


//must true
MustJunction term = queryBuilder.bool().must(queryBuilder.keyword()
          .onFields("title", "content")   .matching(query.getKeyword()).createQuery());
//must false
 term.must(queryBuilder.keyword()
              .onField("status")             .matching(query.getExcludeStatus()).createQuery()).not();

Full example:


private FullTextQuery findByKeywordQuery(TopicQuery query) {
    FullTextSession fullTextSession = Search
        .getFullTextSession(getSession());


    QueryBuilder queryBuilder = fullTextSession.getSearchFactory()
        .buildQueryBuilder().forEntity(Topic.class).get();
    org.apache.lucene.search.Query luceneQuery = null;
    if (null == query.getStatus() && null == query.getUsername() && null == query.getExcludeStatus()) {
      luceneQuery = queryBuilder.keyword()// .wildcard()
          .onFields("title", "content").matching(query.getKeyword())
          // .matching("*" + query.getKeyword() + "*")
          .createQuery();
      if(LOG.isDebugEnabled()){
        LOG.debug("create clean keyword search query: " + luceneQuery.toString());
      }
    } else {
      MustJunction term = queryBuilder.bool().must(queryBuilder.keyword()
          .onFields("title", "content")   .matching(query.getKeyword()).createQuery());
      if(null != query.getStatus()){
        term.must(queryBuilder.keyword()
              // .wildcard()
              .onField("status")
              .matching(query.getStatus()).createQuery());
      }
      if(null != query.getExcludeStatus()){
        term.must(queryBuilder.keyword()
              .onField("status")
              .matching(query.getExcludeStatus()).createQuery()).not();
      }
      if(null != query.getUsername()){
        term.must(queryBuilder.keyword()
              // .wildcard()
              .onField("owner.username")
               .ignoreFieldBridge()
              .matching(query.getUsername()).createQuery());
      }
      luceneQuery =term.createQuery();
      if(LOG.isDebugEnabled()){
        LOG.debug("create complicated keyword search query: " + luceneQuery.toString());
      }
    }
    // BooleanQuery
    FullTextQuery hibernateQuery = fullTextSession.createFullTextQuery(
        luceneQuery, Topic.class);
    return hibernateQuery;
  }


Related articles: