Skip to content

Commit

Permalink
fix: update course model
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiin Kim authored and Jiin Kim committed Jan 9, 2025
1 parent 07d7681 commit ae6016c
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 33 deletions.
16 changes: 12 additions & 4 deletions app/agents/scrapers/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
ProgramModel,
CourseModel,
SessionModel,
ScheduleModel,
CourseListingModel,
Day
)

fake = Faker()

class SessionFactory(factory.DictFactory):
class ScheduleFactory(factory.DictFactory):
class Meta:
model = SessionModel
model = ScheduleModel

location = factory.LazyFunction(lambda: random.choice(["Burnaby", "Surrey", "Vancouver"]))
days = factory.LazyFunction(
lambda: random.sample([
Day.MONDAY,
Expand All @@ -43,6 +43,14 @@ class Meta:
).timestamp())
)

class SessionFactory(factory.DictFactory):
class Meta:
model = SessionModel

campus = factory.LazyFunction(lambda: random.choice(["Burnaby", "Surrey", "Vancouver"]))
location = factory.LazyFunction(lambda: f"Building {fake.random_letter().upper()}-{fake.random_int(min=1000, max=9999)}")
schedules = factory.LazyFunction(lambda: [ScheduleFactory() for _ in range(random.randint(1, 2))]) # 1-2 schedules per session

class CourseFactory(factory.DictFactory):
class Meta:
model = CourseModel
Expand All @@ -53,7 +61,7 @@ class Meta:
)
professorName = factory.Faker('name')
credit = factory.LazyFunction(lambda: random.choice([3, 4]))
sessions = factory.LazyFunction(lambda: [SessionFactory() for _ in range(random.randint(1, 3))])
sessions = factory.LazyFunction(lambda: [SessionFactory() for _ in range(random.randint(1, 3))]) # 1-3 sessions (sections) per course

class ProgramFactory(factory.DictFactory):
class Meta:
Expand Down
89 changes: 63 additions & 26 deletions app/agents/scrapers/simon_fraser_university.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ async def fetch_courses(self) -> CourseListingModel:
continue

for course in courses:
current_course: CourseModel = {
"courseName": course.get('title'),
"courseCode": f"{program['programCode']} {course.get('text')}",
"professorName": None, # Will be updated from detail
"credit": None, # Will be updated from detail
"sessions": []
}

sections_url = f"{self.base_url}?{year}/{term}/{dept['value']}/{course['value']}"
sections = await self._fetch_json(sections_url)

Expand All @@ -157,42 +165,71 @@ async def fetch_courses(self) -> CourseListingModel:
if not detail:
continue

# Create a new course entry for each section
current_course: CourseModel = {
"courseName": course.get('title'),
"courseCode": f"{program['programCode']} {course.get('text')} {section.get('value')}", # Include section in course code
"professorName": None,
"credit": int(detail.get('units')) if detail.get('units') else None,
"sessions": []
# Update course credit and professor if not set yet
if current_course["credit"] is None and detail.get('units'):
current_course["credit"] = int(detail.get('units'))

if current_course["professorName"] is None:
instructors = detail.get('instructor', [])
if instructors:
# Get primary instructor if available, otherwise first instructor
primary_instructor = next(
(i for i in instructors if i.get('roleCode') == 'PI'),
instructors[0] if instructors else None
)
if primary_instructor:
current_course["professorName"] = primary_instructor.get('name')

# Create a new session for this section
current_session: SessionModel = {
"campus": None,
"location": None,
"schedules": []
}

# Get professor name from instructor info
instructors = detail.get('instructor', [])
if instructors:
# Get primary instructor if available, otherwise first instructor
primary_instructor = next(
(i for i in instructors if i.get('roleCode') == 'PI'),
instructors[0] if instructors else None
)
if primary_instructor:
current_course["professorName"] = primary_instructor.get('name')

schedule = detail.get('courseSchedule', [])

for block in schedule:
schedule_blocks = detail.get('courseSchedule', [])
for block in schedule_blocks:
if block.get('isExam'): # Skip exam schedules
continue

try:
session = self._resolve_schedule_block(block)
current_course["sessions"].append(session)
# Update session campus/location if not set
if current_session["campus"] is None:
current_session["campus"] = block.get('campus')

# Parse schedule block
start_date = self._parse_date_string(block.get('startDate', ''))
end_date = self._parse_date_string(block.get('endDate', ''))

# Parse time separately and combine with date
if block.get('startTime'):
start_time = datetime.strptime(block.get('startTime', ''), '%H:%M').time()
start_timestamp = int(datetime.combine(start_date.date(), start_time).timestamp())
else:
start_timestamp = None

if block.get('endTime'):
end_time = datetime.strptime(block.get('endTime', ''), '%H:%M').time()
end_timestamp = int(datetime.combine(end_date.date(), end_time).timestamp())
else:
end_timestamp = None

schedule = {
"days": self._parse_days(block.get('days')),
"startTime": start_timestamp,
"endTime": end_timestamp
}
current_session["schedules"].append(schedule)

except ValueError as e:
# Log and continue to next block
self.logger.warning(str(e))
continue

if current_session["schedules"]: # Only add sessions with schedules
current_course["sessions"].append(current_session)

if current_course["sessions"]: # Only add courses with sessions
program["courses"].append(current_course)
if current_course["sessions"]: # Only add courses with sessions
program["courses"].append(current_course)

if program["courses"]: # Only add programs with courses
programs.append(program)
Expand Down
9 changes: 6 additions & 3 deletions app/models/course.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@ class Day(str, Enum):
SATURDAY = "sat"
SUNDAY = "sun"

class SessionModel(TypedDict):
campus: Optional[str]
location: Optional[str]
class ScheduleModel(TypedDict):
days: List[Day]
startTime: Optional[int] # unix timestamp
endTime: Optional[int] # unix timestamp

class SessionModel(TypedDict):
campus: Optional[str]
location: Optional[str]
schedules: List[ScheduleModel]

class CourseModel(TypedDict):
courseName: Optional[str] # e.g. "Operating Systems"
courseCode: Optional[str] # e.g. "CMPT 300 D100"
Expand Down

0 comments on commit ae6016c

Please sign in to comment.