Search This Blog

2024/04/25

MongoDb:$bucket operator

Today we will explore $bucket operator in mongodb.

$bucket:
Categorizes incoming documents into groups, called buckets, based on a
specified expression and bucket boundaries and outputs a document per each
bucket. Each output document contains an _id field whose value specifies the
inclusive lower bound of the bucket. The output option specifies the fields
included in each output document.

$bucket only produces output documents for buckets that contain at least one
input document.

we will create a collection teacher for this purpose.

//drop teacher collection if exists
db.teacher.drop()

//populate teacher collection with data.
db.teacher.insertMany([
{
name:"sagar desai",
email:"sagar.desai@example.com",
age:30,
gender:"male",
},
{
name:"sachin sawant",
email:"sagar.sawant@example.com",
age:30,
gender:"male",
},
{
name:"swara desai",
email:"swara.desai@example.com",
age:20,
gender:"female",
},
{
name:"mrunmayi sawant",
email:"mrunmayi.sawant@example.com",
age:30,
gender:"female",
},
{
name:"sandesh raorane",
email:"sandesh.raorane@example.com",
age:45,
gender:"male",
},
{
name:"Gaurav raorane",
email:"gaurav.raorane@example.com",
age:42,
gender:"male",
},
{
name:"vinayak tambe",
email:"vinayak.tambe@example.com",
age:21,
gender:"male",
},
{
name:"kiran rao",
email:"kiran.rao@example.com",
age:21,
gender:"trans-gender",
}
])

This will give us counts based on bucket.
For Age :0-30 (0 included,30 excluded) ,total count of teachers
For Age :30-40 (30 included,40 excluded) ,total count of teachers
For Age :40-more(40 included,more than 40)),total count of teachers

db.teacher.aggregate([
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1}
}
}
}
])

Output:
[
{ _id: 0, count: 3 },
{ _id: 30, count: 3 },
{ _id: 'GreaterThan40', count: 2 }
]

Below query along with previous output adds emails of teacher into array email
array for each group formed by boundries

db.teacher.aggregate([
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1},
email:{$push:"$email"}
}
}
}
])

Output:
[
{
_id: 0,
count: 3,
email: [
'swara.desai@example.com',
'vinayak.tambe@example.com',
'kiran.rao@example.com'
]
},
{
_id: 30,
count: 3,
email: [
'sagar.desai@example.com',
'sagar.sawant@example.com',
'mrunmayi.sawant@example.com'
]
},
{
_id: 'GreaterThan40',
count: 2,
email: [ 'sandesh.raorane@example.com', 'gaurav.raorane@example.com' ]
}
]

Below query along with counts for each group adds total documents in teachers
array.

db.teacher.aggregate([
{
$bucket:{
groupBy:"$age",
boundaries:[0,30,40],
default:"GreaterThan40",
output:{
count:{"$sum":1},
teachers:{$push:{"email":"$email","age":"$age",
name:"$name",gender:"$gender"}}
}
}
}
])

Output:
[
{
_id: 0,
count: 3,
teachers: [
{
email: 'swara.desai@example.com',
age: 20,
name: 'swara desai',
gender: 'female'
},
{
email: 'vinayak.tambe@example.com',
age: 21,
name: 'vinayak tambe',
gender: 'male'
},
{
email: 'kiran.rao@example.com',
age: 21,
name: 'kiran rao',
gender: 'trans-gender'
}
]
},
{
_id: 30,
count: 3,
teachers: [
{
email: 'sagar.desai@example.com',
age: 30,
name: 'sagar desai',
gender: 'male'
},
{
email: 'sagar.sawant@example.com',
age: 30,
name: 'sachin sawant',
gender: 'male'
},
{
email: 'mrunmayi.sawant@example.com',
age: 30,
name: 'mrunmayi sawant',
gender: 'female'
}
]
},
{
_id: 'GreaterThan40',
count: 2,
teachers: [
{
email: 'sandesh.raorane@example.com',
age: 45,
name: 'sandesh raorane',
gender: 'male'
},
{
email: 'gaurav.raorane@example.com',
age: 42,
name: 'Gaurav raorane',
gender: 'male'
}
]
}
]

We can not use string with boundry condition as the comparison is lexographic.

Lets do it by another way

We build this query step by step.

Step 1:
db.teacher.aggregate([
{
$group:{
_id:"$gender",
count:{"$sum":1}
}
}
])



is logically almost equivalent to

db.teacher.aggregate([
{
$group:{
_id:{"gender":"$gender"},
count:{"$sum":1}
}
}
])

Output:
[
{ _id: 'female', count: 2 },
{ _id: 'trans-gender', count: 1 },
{ _id: 'male', count: 5 }
]


Now


db.teacher.aggregate([
{
$group:{
_id:{"gender":"$gender"},
count:{"$sum":1},
teachers:{$push:{"email":"$email","age":"$age",
                name:"$name",gender:"$gender"}}
}
}
])

Output:
[
{
_id: { gender: 'female' },
count: 2,
teachers: [
{
email: 'swara.desai@example.com',
age: 20,
name: 'swara desai',
gender: 'female'
},
{
email: 'mrunmayi.sawant@example.com',
age: 30,
name: 'mrunmayi sawant',
gender: 'female'
}
]
},
{
_id: { gender: 'male' },
count: 5,
teachers: [
{
email: 'sagar.desai@example.com',
age: 30,
name: 'sagar desai',
gender: 'male'
},
{
email: 'sagar.sawant@example.com',
age: 30,
name: 'sachin sawant',
gender: 'male'
},
{
email: 'sandesh.raorane@example.com',
age: 45,
name: 'sandesh raorane',
gender: 'male'
},
{
email: 'gaurav.raorane@example.com',
age: 42,
name: 'Gaurav raorane',
gender: 'male'
},
{
email: 'vinayak.tambe@example.com',
age: 21,
name: 'vinayak tambe',
gender: 'male'
}
]
},
{
_id: { gender: 'trans-gender' },
count: 1,
teachers: [
{
email: 'kiran.rao@example.com',
age: 21,
name: 'kiran rao',
gender: 'trans-gender'
}
]
}
]

No comments:

Post a Comment