Aggregation Pipeline

MongoDB is not just adding and filtering data using find()

Aggregation is a way of processing a large number of documents in a collection by means of passing them through different stages.

The stages make up what is known as a pipeline. The stages in a pipeline can filter, sort, group, reshape and modify documents that pass through the pipeline

$match stage – filters those documents we need to work with, those that fit our needs
$group stage – does the aggregation job
$sort stage – sorts the resulting documents the way we require (ascending or descending)

use sampleagg;

db.states.insertMany(
[
  { "_id": "NY", "name": "New York", "population": 19453561, "rank": 4 },
  { "_id": "CA", "name": "California", "population": 39512223, "rank": 1 },
  { "_id": "TX", "name": "Texas", "population": 28995881, "rank": 2 },
  { "_id": "NJ", "name": "New Jersey", "population": 8995881, "rank": 3 }
]
)

db.universities.insertMany(
[
  { "name": "Columbia University", "stateId": "NY", "year": 2020, "enrolled": 31000, "graduated": 7000 },
  { "name": "New York University", "stateId": "NY", "year": 2020, "enrolled": 51123, "graduated": 12000 },
  { "name": "University of California, Berkeley", "stateId": "CA", "year": 2020, "enrolled": 43000, "graduated": 10000 },
  { "name": "Stanford University", "stateId": "CA", "year": 2020, "enrolled": 17400, "graduated": 4000 },
  { "name": "University of Texas at Austin", "stateId": "TX", "year": 2020, "enrolled": 51000, "graduated": 11000 },
  { "name": "Rowan University", "stateId": "NJ", "year": 2020, "enrolled": 19500, "graduated": 4300 },
  { "name": "Rutgers University", "stateId": "NJ", "year": 2020, "enrolled": 70645, "graduated": 17600 }
]
)


db.universities.find()

// Step 1
db.universities.aggregate([
  {
    $match: {
      "year": 2020  // Focus on the year 2020
    }
  },
  
  {
     $project: {
     _id:1,
     name:1,
     stateId:1,
     year:1,
     enrolled:1,
     graduated:1
    }
  }
])

// Step 2

// Group by State, and do a sum of Enrolled, Graduated
// In MongoDB count is not part of Grouping.

db.universities.aggregate([
  {
    $match: {
      "year": 2020  // Focus on the year 2020
    }
  },
  {
    $group: {
      _id: "$stateId",  // Group by state ID
      totalEnrolled: { $sum: "$enrolled" },  // Sum of all students enrolled
      totalGraduated: { $sum: "$graduated" },  // Sum of all graduates
      universitiesCount: { $sum: 1 }  // Count universities
    }
  },
  
  {
     $project: {
     _id:1,
     totalEnrolled:1,
     totalGraduated:1,
     universitiesCount:1
    }
  }
])

// Step 3

// Join with State Documents

db.universities.aggregate([
  {
    $match: {
      "year": 2020  // Focus on the year 2020
    }
  },
  {
    $group: {
      _id: "$stateId",  // Group by state ID
      totalEnrolled: { $sum: "$enrolled" },  // Sum of all students enrolled
      totalGraduated: { $sum: "$graduated" },  // Sum of all graduates
      universitiesCount: { $sum: 1 }  // Count universities
    }
  },
  {
      $lookup:{
          from:"states",
          localField:"_id",
          foreignField: "_id",
          as:"stateDetails"
      }
  },
  {
     $project: {
     _id:1,
     totalEnrolled:1,
     totalGraduated:1,
     universitiesCount:1,
     stateDetails:1
    }
  }
])


// Step 4
// Final Query
// is used to deconstruct the stateDetails array resulting from the $lookup, 
// making it easier to work with the state details in subsequent stages.

db.universities.aggregate([
  {
    $match: {
      "year": 2020  // Focus on the year 2020
    }
  },
 
  {
    $group: {
      _id: "$stateId",  // Group by state ID
      totalEnrolled: { $sum: "$enrolled" },  // Sum of all students enrolled
      totalGraduated: { $sum: "$graduated" },  // Sum of all graduates
      universitiesCount: { $sum: 1 }  // Count universities
    }
  },
  {
      $lookup:{
          from:"states",
          localField:"_id",
          foreignField: "_id",
          as:"stateDetails"
      }
  },
  {
      $unwind: "$stateDetails"
  },
  {
     $project: {
      _id: 1,
      totalEnrolled:1,
      totalGraduated:1,
      universitiesCount:1,
      "stateName": "$stateDetails.name",
      "statePopulation": "$stateDetails.population",
      "stateRank": "$stateDetails.rank",
    }
  }
])

PreviousOperators NextFurther Reading

Last updated 1 year ago

use sampleagg; db.states.insertMany( [ { "_id": "NY", "name": "New York", "population": 19453561, "rank": 4 }, { "_id": "CA", "name": "California", "population": 39512223, "rank": 1 }, { "_id": "TX", "name": "Texas", "population": 28995881, "rank": 2 }, { "_id": "NJ", "name": "New Jersey", "population": 8995881, "rank": 3 } ] ) db.universities.insertMany( [ { "name": "Columbia University", "stateId": "NY", "year": 2020, "enrolled": 31000, "graduated": 7000 }, { "name": "New York University", "stateId": "NY", "year": 2020, "enrolled": 51123, "graduated": 12000 }, { "name": "University of California, Berkeley", "stateId": "CA", "year": 2020, "enrolled": 43000, "graduated": 10000 }, { "name": "Stanford University", "stateId": "CA", "year": 2020, "enrolled": 17400, "graduated": 4000 }, { "name": "University of Texas at Austin", "stateId": "TX", "year": 2020, "enrolled": 51000, "graduated": 11000 }, { "name": "Rowan University", "stateId": "NJ", "year": 2020, "enrolled": 19500, "graduated": 4300 }, { "name": "Rutgers University", "stateId": "NJ", "year": 2020, "enrolled": 70645, "graduated": 17600 } ] ) db.universities.find() // Step 1 db.universities.aggregate([ { $match: { "year": 2020 // Focus on the year 2020 } }, { $project: { _id:1, name:1, stateId:1, year:1, enrolled:1, graduated:1 } } ]) // Step 2 // Group by State, and do a sum of Enrolled, Graduated // In MongoDB count is not part of Grouping. db.universities.aggregate([ { $match: { "year": 2020 // Focus on the year 2020 } }, { $group: { _id: "$stateId", // Group by state ID totalEnrolled: { $sum: "$enrolled" }, // Sum of all students enrolled totalGraduated: { $sum: "$graduated" }, // Sum of all graduates universitiesCount: { $sum: 1 } // Count universities } }, { $project: { _id:1, totalEnrolled:1, totalGraduated:1, universitiesCount:1 } } ]) // Step 3 // Join with State Documents db.universities.aggregate([ { $match: { "year": 2020 // Focus on the year 2020 } }, { $group: { _id: "$stateId", // Group by state ID totalEnrolled: { $sum: "$enrolled" }, // Sum of all students enrolled totalGraduated: { $sum: "$graduated" }, // Sum of all graduates universitiesCount: { $sum: 1 } // Count universities } }, { $lookup:{ from:"states", localField:"_id", foreignField: "_id", as:"stateDetails" } }, { $project: { _id:1, totalEnrolled:1, totalGraduated:1, universitiesCount:1, stateDetails:1 } } ]) // Step 4 // Final Query // is used to deconstruct the stateDetails array resulting from the $lookup, // making it easier to work with the state details in subsequent stages. db.universities.aggregate([ { $match: { "year": 2020 // Focus on the year 2020 } }, { $group: { _id: "$stateId", // Group by state ID totalEnrolled: { $sum: "$enrolled" }, // Sum of all students enrolled totalGraduated: { $sum: "$graduated" }, // Sum of all graduates universitiesCount: { $sum: 1 } // Count universities } }, { $lookup:{ from:"states", localField:"_id", foreignField: "_id", as:"stateDetails" } }, { $unwind: "$stateDetails" }, { $project: { _id: 1, totalEnrolled:1, totalGraduated:1, universitiesCount:1, "stateName": "$stateDetails.name", "statePopulation": "$stateDetails.population", "stateRank": "$stateDetails.rank", } } ])