Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.

Significant Terms Aggregation

More info about histogram aggregation is in the official elasticsearch docs

An aggregation that returns interesting or unusual occurrences of terms in a set.

Simple example

{
    "query" : {
        "terms" : {"force" : [ "British Transport Police" ]}
    },
    "aggregations" : {
        "significantCrimeTypes" : {
            "significant_terms" : { "field" : "crime_type" }
        }
    }
}

And now the query via DSL:

$significantTermsAggregation = new SignificantTermsAggregation('significantCrimeTypes', 'crime_type');
$query = new TermsQuery('force', ['British Transport Police']);

$search = new Search();
$search->addQuery($query);
$search->addAggregation($histogramAggregation);

$queryArray = $search->toArray();

Multi-set Analysis

A simpler way to perform analysis across multiple categories is to use a parent-level aggregation to segment the data ready for analysis.


{
    "aggregations": {
        "forces": {
            "terms": {"field": "force"},
            "aggregations": {
                "significantCrimeTypes": {
                    "significant_terms": {"field": "crime_type"}
                }
            }
        }
    }
}

And now the query via DSL:


$significantTermsAggregation = new SignificantTermsAggregation('significantCrimeTypes', 'crime_type');
$termsAggregation = new TermsAggregation('forces', 'force');
$termsAggregation->addAggregation($significantTermsAggregation);

$search = new Search();
$search->addAggregation($termsAggregation);

$queryArray = $search->toArray();

Other top level aggregations can be used to segment the data, for example, it can be segmented by geographic area to identify unusual hot-spots of a particular crime:


{
    "aggs": {
        "hotspots": {
            "geohash_grid" : {
                "field":"location",
                "precision":5,
            },
            "aggs": {
                "significantCrimeTypes": {
                    "significant_terms": {"field": "crime_type"}
                }
            }
        }
    }
}

And now via DSL:


$significantTermsAggregation = new SignificantTermsAggregation('significantCrimeTypes', 'crime_type');
$geoHashAggregation = new GeoHashGridAggregation('hotspots', 'location', 5);
$geoHashAggregation->addAggregation($significantTermsAggregation);

$search = new Search();
$search->addAggregation($geoHashAggregation);

$queryArray = $search->toArray();