MongoDB is not just adding and filtering data using find()
Aggregation is a way of processing a large number of documents in a collection by means of passing them through different stages.
The stages make up what is known as a pipeline. The stages in a pipeline can filter, sort, group, reshape and modify documents that pass through the pipeline
$match stage – filters those documents we need to work with, those that fit our needs
$group stage – does the aggregation job
$sort stage – sorts the resulting documents the way we require (ascending or descending)
use sampleagg;
db.states.insertMany(
[
{ "_id": "NY", "name": "New York", "population": 19453561, "rank": 4 },
{ "_id": "CA", "name": "California", "population": 39512223, "rank": 1 },
{ "_id": "TX", "name": "Texas", "population": 28995881, "rank": 2 },
{ "_id": "NJ", "name": "New Jersey", "population": 8995881, "rank": 3 }
]
)
db.universities.insertMany(
[
{ "name": "Columbia University", "stateId": "NY", "year": 2020, "enrolled": 31000, "graduated": 7000 },
{ "name": "New York University", "stateId": "NY", "year": 2020, "enrolled": 51123, "graduated": 12000 },
{ "name": "University of California, Berkeley", "stateId": "CA", "year": 2020, "enrolled": 43000, "graduated": 10000 },
{ "name": "Stanford University", "stateId": "CA", "year": 2020, "enrolled": 17400, "graduated": 4000 },
{ "name": "University of Texas at Austin", "stateId": "TX", "year": 2020, "enrolled": 51000, "graduated": 11000 },
{ "name": "Rowan University", "stateId": "NJ", "year": 2020, "enrolled": 19500, "graduated": 4300 },
{ "name": "Rutgers University", "stateId": "NJ", "year": 2020, "enrolled": 70645, "graduated": 17600 }
]
)
db.universities.find()
// Step 1
db.universities.aggregate([
{
$match: {
"year": 2020 // Focus on the year 2020
}
},
{
$project: {
_id:1,
name:1,
stateId:1,
year:1,
enrolled:1,
graduated:1
}
}
])
// Step 2
// Group by State, and do a sum of Enrolled, Graduated
// In MongoDB count is not part of Grouping.
db.universities.aggregate([
{
$match: {
"year": 2020 // Focus on the year 2020
}
},
{
$group: {
_id: "$stateId", // Group by state ID
totalEnrolled: { $sum: "$enrolled" }, // Sum of all students enrolled
totalGraduated: { $sum: "$graduated" }, // Sum of all graduates
universitiesCount: { $sum: 1 } // Count universities
}
},
{
$project: {
_id:1,
totalEnrolled:1,
totalGraduated:1,
universitiesCount:1
}
}
])
// Step 3
// Join with State Documents
db.universities.aggregate([
{
$match: {
"year": 2020 // Focus on the year 2020
}
},
{
$group: {
_id: "$stateId", // Group by state ID
totalEnrolled: { $sum: "$enrolled" }, // Sum of all students enrolled
totalGraduated: { $sum: "$graduated" }, // Sum of all graduates
universitiesCount: { $sum: 1 } // Count universities
}
},
{
$lookup:{
from:"states",
localField:"_id",
foreignField: "_id",
as:"stateDetails"
}
},
{
$project: {
_id:1,
totalEnrolled:1,
totalGraduated:1,
universitiesCount:1,
stateDetails:1
}
}
])
// Step 4
// Final Query
// is used to deconstruct the stateDetails array resulting from the $lookup,
// making it easier to work with the state details in subsequent stages.
db.universities.aggregate([
{
$match: {
"year": 2020 // Focus on the year 2020
}
},
{
$group: {
_id: "$stateId", // Group by state ID
totalEnrolled: { $sum: "$enrolled" }, // Sum of all students enrolled
totalGraduated: { $sum: "$graduated" }, // Sum of all graduates
universitiesCount: { $sum: 1 } // Count universities
}
},
{
$lookup:{
from:"states",
localField:"_id",
foreignField: "_id",
as:"stateDetails"
}
},
{
$unwind: "$stateDetails"
},
{
$project: {
_id: 1,
totalEnrolled:1,
totalGraduated:1,
universitiesCount:1,
"stateName": "$stateDetails.name",
"statePopulation": "$stateDetails.population",
"stateRank": "$stateDetails.rank",
}
}
])