Search This Blog

2024/04/25

MongoDb:$facet


 $facet:

Processes multiple aggregation pipelines within a single stage on the
same set of input documents. Each sub-pipeline has its own field in the
output document where its results are stored as an array of documents.

The $facet stage allows you to create multi-faceted aggregations which
characterize data across multiple dimensions, or facets, within a single
aggregation stage. Multi-faceted aggregations provide multiple filters and
categorizations to guide data browsing and analysis. Retailers commonly use
faceting to narrow search results by creating filters on product price,
manufacturer, size, etc.

Input documents are passed to the $facet stage only once. $facet enables
various aggregations on the same set of input documents, without needing to
retrieve the input documents multiple times.

We wil use teacher collection for understanding $facet.

//drop teacher collection if prexist
db.teacher.drop()

//add some sample data for teacher collection
db.teacher.insertMany([
{
name:"sagar desai",
email:"sagar.desai@example.com",
age:30,
gender:"male",
},
{
name:"sachin sawant",
email:"sagar.sawant@example.com",
age:30,
gender:"male",
},
{
name:"swara desai",
email:"swara.desai@example.com",
age:20,
gender:"female",
},
{
name:"mrunmayi sawant",
email:"mrunmayi.sawant@example.com",
age:30,
gender:"female",
},
{
name:"sandesh raorane",
email:"sandesh.raorane@example.com",
age:45,
gender:"male",
},
{
name:"Gaurav raorane",
email:"gaurav.raorane@example.com",
age:42,
gender:"male",
},
{
name:"vinayak tambe",
email:"vinayak.tambe@example.com",
age:21,
gender:"male",
},
{
name:"kiran rao",
email:"kiran.rao@example.com",
age:21,
gender:"trans-gender",
}
])


Lets first run following command
db.teacher.aggregate([
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1}
}
}
}
])

Output:
[
{ _id: 0, count: 3 },
{ _id: 30, count: 3 },
{ _id: 'GreaterThan40', count: 2 }
]

db.teacher.aggregate([
{
$group:{
_id:"$gender",
count:{"$sum":1}
}
}
])
Output:
[
{ _id: 'female', count: 2 },
{ _id: 'trans-gender', count: 1 },
{ _id: 'male', count: 5 }
]


Ouput of last two queries can be done in single query using facet.
Here is how


db.teacher.aggregate([
{
$facet:{
"facetByAge":[
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1}
}
}
}
],
"faceByGender":[{
$group:{
_id:"$gender",
count:{"$sum":1}
}
}]
}
}
])

Output:
[
{
facetByAge: [
{ _id: 0, count: 3 },
{ _id: 30, count: 3 },
{ _id: 'GreaterThan40', count: 2 }
],
faceByGender: [
{ _id: 'male', count: 5 },
{ _id: 'female', count: 2 },
{ _id: 'trans-gender', count: 1 }
]
}
]

We can get all document based on grouping by gender & age just
my small modification in our previous query.

db.teacher.aggregate([
{
$facet:{
"facetByAge":[
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1},
teachers:{$push:{"email":"$email",
            "age":"$age",name:"$name",gender:"$gender"}}
}
}
},{
$project:{
_id:1,
count:1,
teachers:1,
}
}
],
"faceByGender":[{
$group:{
_id:"$gender",
count:{"$sum":1},
teachers:{$push:{"email":"$email",
            "age":"$age",name:"$name",gender:"$gender"}}
}
}]
}
}
])

Output:
[
{
facetByAge: [
{
_id: 0,
count: 3,
teachers: [
{
email: 'swara.desai@example.com',
age: 20,
name: 'swara desai',
gender: 'female'
},
{
email: 'vinayak.tambe@example.com',
age: 21,
name: 'vinayak tambe',
gender: 'male'
},
{
email: 'kiran.rao@example.com',
age: 21,
name: 'kiran rao',
gender: 'trans-gender'
}
]
},
{
_id: 30,
count: 3,
teachers: [
{
email: 'sagar.desai@example.com',
age: 30,
name: 'sagar desai',
gender: 'male'
},
{
email: 'sagar.sawant@example.com',
age: 30,
name: 'sachin sawant',
gender: 'male'
},
{
email: 'mrunmayi.sawant@example.com',
age: 30,
name: 'mrunmayi sawant',
gender: 'female'
}
]
},
{
_id: 'GreaterThan40',
count: 2,
teachers: [
{
email: 'sandesh.raorane@example.com',
age: 45,
name: 'sandesh raorane',
gender: 'male'
},
{
email: 'gaurav.raorane@example.com',
age: 42,
name: 'Gaurav raorane',
gender: 'male'
}
]
}
],
faceByGender: [
{
_id: 'male',
count: 5,
teachers: [
{
email: 'sagar.desai@example.com',
age: 30,
name: 'sagar desai',
gender: 'male'
},
{
email: 'sagar.sawant@example.com',
age: 30,
name: 'sachin sawant',
gender: 'male'
},
{
email: 'sandesh.raorane@example.com',
age: 45,
name: 'sandesh raorane',
gender: 'male'
},
{
email: 'gaurav.raorane@example.com',
age: 42,
name: 'Gaurav raorane',
gender: 'male'
},
{
email: 'vinayak.tambe@example.com',
age: 21,
name: 'vinayak tambe',
gender: 'male'
}
]
},
{
_id: 'female',
count: 2,
teachers: [
{
email: 'swara.desai@example.com',
age: 20,
name: 'swara desai',
gender: 'female'
},
{
email: 'mrunmayi.sawant@example.com',
age: 30,
name: 'mrunmayi sawant',
gender: 'female'
}
]
},
{
_id: 'trans-gender',
count: 1,
teachers: [
{
email: 'kiran.rao@example.com',
age: 21,
name: 'kiran rao',
gender: 'trans-gender'
}
]
}
]
}
]

In this output

_id =0 corresponds with age < 30
_id =30 corresponds with 30 <= age < 40
_id =0 corresponds with age > 40

accordingly we will use switch case to
give that in result.Below is modified query

db.teacher.aggregate([
{
$facet:{
"facetByAge":[
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1},
teachers:{$push:{"email":"$email",
            "age":"$age",name:"$name",gender:"$gender"}}
}
}
},{
$project:{
_id:1,
count:1,
teachers:1,
"AgeGroup" :{
$switch:
{
branches: [
{
case: { $eq :["$_id", 0] },
then: "age < 30"
},
{
case: { $eq:["$_id",30] },
then: "30 <= age < 40"
}
],
default: "age > 40"
}
}
}
}
],
"faceByGender":[{
$group:{
_id:"$gender",
count:{"$sum":1},
teachers:{$push:{"email":"$email",
            "age":"$age",name:"$name",gender:"$gender"}}
}
}]
}
}
])

Output:
[
{
facetByAge: [
{
_id: 0,
count: 3,
teachers: [
{
email: 'swara.desai@example.com',
age: 20,
name: 'swara desai',
gender: 'female'
},
{
email: 'vinayak.tambe@example.com',
age: 21,
name: 'vinayak tambe',
gender: 'male'
},
{
email: 'kiran.rao@example.com',
age: 21,
name: 'kiran rao',
gender: 'trans-gender'
}
],
AgeGroup: 'age < 30'
},
{
_id: 30,
count: 3,
teachers: [
{
email: 'sagar.desai@example.com',
age: 30,
name: 'sagar desai',
gender: 'male'
},
{
email: 'sagar.sawant@example.com',
age: 30,
name: 'sachin sawant',
gender: 'male'
},
{
email: 'mrunmayi.sawant@example.com',
age: 30,
name: 'mrunmayi sawant',
gender: 'female'
}
],
AgeGroup: '30 <= age < 40'
},
{
_id: 'GreaterThan40',
count: 2,
teachers: [
{
email: 'sandesh.raorane@example.com',
age: 45,
name: 'sandesh raorane',
gender: 'male'
},
{
email: 'gaurav.raorane@example.com',
age: 42,
name: 'Gaurav raorane',
gender: 'male'
}
],
AgeGroup: 'age > 40'
}
],
faceByGender: [
{
_id: 'male',
count: 5,
teachers: [
{
email: 'sagar.desai@example.com',
age: 30,
name: 'sagar desai',
gender: 'male'
},
{
email: 'sagar.sawant@example.com',
age: 30,
name: 'sachin sawant',
gender: 'male'
},
{
email: 'sandesh.raorane@example.com',
age: 45,
name: 'sandesh raorane',
gender: 'male'
},
{
email: 'gaurav.raorane@example.com',
age: 42,
name: 'Gaurav raorane',
gender: 'male'
},
{
email: 'vinayak.tambe@example.com',
age: 21,
name: 'vinayak tambe',
gender: 'male'
}
]
},
{
_id: 'female',
count: 2,
teachers: [
{
email: 'swara.desai@example.com',
age: 20,
name: 'swara desai',
gender: 'female'
},
{
email: 'mrunmayi.sawant@example.com',
age: 30,
name: 'mrunmayi sawant',
gender: 'female'
}
]
},
{
_id: 'trans-gender',
count: 1,
teachers: [
{
email: 'kiran.rao@example.com',
age: 21,
name: 'kiran rao',
gender: 'trans-gender'
}
]
}
]
}
]

Usually in aggregations output of previous operation is input to
next operation,but in facet all section get same input.

MongoDb: $sortByCount operator

$sortByCount:
Groups incoming documents based on the value of a specified expression,
then computes the count of documents in each distinct group.

Each output document contains two fields: an _id field containing the
distinct grouping value, and a count field containing the number of
documents belonging to that grouping or category.

The documents are sorted by count in descending order.

db.exhibits.insertMany([
{ "_id" : 1, "title" : "The Pillars of Society", "artist" : "Grosz", "year" : 1926, "tags" : [ "painting", "satire", "Expressionism", "caricature" ] },
{ "_id" : 2, "title" : "Melancholy III", "artist" : "Munch", "year" : 1902, "tags" : [ "woodcut", "Expressionism" ] },
{ "_id" : 3, "title" : "Dancer", "artist" : "Miro", "year" : 1925, "tags" : [ "oil", "Surrealism", "painting" ] },
{ "_id" : 4, "title" : "The Great Wave off Kanagawa", "artist" : "Hokusai", "tags" : [ "woodblock", "ukiyo-e" ] },
{ "_id" : 5, "title" : "The Persistence of Memory", "artist" : "Dali", "year" : 1931, "tags" : [ "Surrealism", "painting", "oil" ] },
{ "_id" : 6, "title" : "Composition VII", "artist" : "Kandinsky", "year" : 1913, "tags" : [ "oil", "painting", "abstract" ] },
{ "_id" : 7, "title" : "The Scream", "artist" : "Munch", "year" : 1893, "tags" : [ "Expressionism", "painting", "oil" ] },
{ "_id" : 8, "title" : "Blue Flower", "artist" : "O'Keefe", "year" : 1918, "tags" : [ "abstract", "painting" ] }
])


db.exhibits.aggregate( [
{ $unwind: "$tags" }
])

Output:
[
{
_id: 1,
title: 'The Pillars of Society',
artist: 'Grosz',
year: 1926,
tags: 'painting'
},
{
_id: 1,
title: 'The Pillars of Society',
artist: 'Grosz',
year: 1926,
tags: 'satire'
},
{
_id: 1,
title: 'The Pillars of Society',
artist: 'Grosz',
year: 1926,
tags: 'Expressionism'
},
{
_id: 1,
title: 'The Pillars of Society',
artist: 'Grosz',
year: 1926,
tags: 'caricature'
},
{
_id: 2,
title: 'Melancholy III',
artist: 'Munch',
year: 1902,
tags: 'woodcut'
},
{
_id: 2,
title: 'Melancholy III',
artist: 'Munch',
year: 1902,
tags: 'Expressionism'
},
{ _id: 3, title: 'Dancer', artist: 'Miro', year: 1925, tags: 'oil' },
{
_id: 3,
title: 'Dancer',
artist: 'Miro',
year: 1925,
tags: 'Surrealism'
},
{
_id: 3,
title: 'Dancer',
artist: 'Miro',
year: 1925,
tags: 'painting'
},
{
_id: 4,
title: 'The Great Wave off Kanagawa',
artist: 'Hokusai',
tags: 'woodblock'
},
{
_id: 4,
title: 'The Great Wave off Kanagawa',
artist: 'Hokusai',
tags: 'ukiyo-e'
},
{
_id: 5,
title: 'The Persistence of Memory',
artist: 'Dali',
year: 1931,
tags: 'Surrealism'
},
{
_id: 5,
title: 'The Persistence of Memory',
artist: 'Dali',
year: 1931,
tags: 'painting'
},
{
_id: 5,
title: 'The Persistence of Memory',
artist: 'Dali',
year: 1931,
tags: 'oil'
},
{
_id: 6,
title: 'Composition VII',
artist: 'Kandinsky',
year: 1913,
tags: 'oil'
},
{
_id: 6,
title: 'Composition VII',
artist: 'Kandinsky',
year: 1913,
tags: 'painting'
},
{
_id: 6,
title: 'Composition VII',
artist: 'Kandinsky',
year: 1913,
tags: 'abstract'
},
{
_id: 7,
title: 'The Scream',
artist: 'Munch',
year: 1893,
tags: 'Expressionism'
},
{
_id: 7,
title: 'The Scream',
artist: 'Munch',
year: 1893,
tags: 'painting'
},
{
_id: 7,
title: 'The Scream',
artist: 'Munch',
year: 1893,
tags: 'oil'
}
]
Actual query:

Gives count based on tags & sort documents
descending order based on count.

db.exhibits.aggregate( [
{ $unwind: "$tags" },
{ $sortByCount: "$tags" }
])


Output:
[
{ _id: 'painting', count: 6 },
{ _id: 'oil', count: 4 },
{ _id: 'Expressionism', count: 3 },
{ _id: 'abstract', count: 2 },
{ _id: 'Surrealism', count: 2 },
{ _id: 'ukiyo-e', count: 1 },
{ _id: 'woodcut', count: 1 },
{ _id: 'woodblock', count: 1 },
{ _id: 'caricature', count: 1 },
{ _id: 'satire', count: 1 }
]