diff --git a/.gitignore b/.gitignore index a6a1d1f..132e021 100644 --- a/.gitignore +++ b/.gitignore @@ -43,7 +43,6 @@ deploy_log.sh .vscode/ .firebase/ /api-tools -/qodana.yaml # output data and logs data/ diff --git a/build.bat b/build.bat index 5eb076f..16a12f3 100644 --- a/build.bat +++ b/build.bat @@ -24,8 +24,8 @@ echo Performing checks... go mod tidy && ^ go vet ./... && ^ staticcheck ./... && ^ -gofmt -w ./.. && ^ -goimports -w ./.. +gofmt -w . && ^ +goimports -w . if ERRORLEVEL 1 exit /b %ERRORLEVEL% :: fail if error occurred echo Checks done! if %skip%==1 exit diff --git a/parser/cometCalendarParser_test.go b/parser/cometCalendarParser_test.go index 7f60cab..04e922c 100644 --- a/parser/cometCalendarParser_test.go +++ b/parser/cometCalendarParser_test.go @@ -1,396 +1,396 @@ -package parser - -import ( - "encoding/json" - "os" - "path/filepath" - "slices" - "testing" - "time" - - "go.mongodb.org/mongo-driver/bson/primitive" - - "github.com/UTDNebula/nebula-api/api/schema" - "github.com/google/go-cmp/cmp" -) - -// helper function to read JSON files into Go structs for testing purposes -func readJSONFile[T any](t *testing.T, path string) T { - t.Helper() - - var result T - - data, err := os.ReadFile(path) - if err != nil { - t.Fatalf("failed to read file %q: %v", path, err) - } - - if err := json.Unmarshal(data, &result); err != nil { - t.Fatalf("failed to unmarshal JSON from file %q: %v", path, err) - } - - return result -} - -// helper function to write Go structs to JSON files for testing purposes -func writeJSONFile(t *testing.T, path string, value any) { - t.Helper() - - data, err := json.Marshal(value) - if err != nil { - t.Fatalf("failed to marshal value to JSON: %v", err) - } - - if err := os.WriteFile(path, data, 0o777); err != nil { - t.Fatalf("failed to write file %q: %v", path, err) - } -} - -// helper function to create a pointer to a string to match expected struct field -func strPtr(s string) *string { - return &s -} - -// helper function to create valid schema.Event object for testing purposes -func makeEvent(summary, location string, startTime time.Time) schema.Event { - return schema.Event{ - Id: primitive.NewObjectID(), - Summary: summary, - Location: location, - StartTime: startTime, - EndTime: startTime.Add(time.Hour), - } -} - -// helper function to find a specific date entry in the parser result and fail if it is missing -func findDate( - t *testing.T, - result []schema.MultiBuildingEvents[schema.Event], - date string, -) *schema.MultiBuildingEvents[schema.Event] { - t.Helper() - - for i := range result { - if result[i].Date == date { - return &result[i] - } - } - - t.Fatalf("date %q not found in result", date) - return nil -} - -// helper function to find a specific building entry under a date and fail if it is missing -func findBuilding( - t *testing.T, - dateEntry *schema.MultiBuildingEvents[schema.Event], - building string, -) *schema.SingleBuildingEvents[schema.Event] { - t.Helper() - - for i := range dateEntry.Buildings { - if dateEntry.Buildings[i].Building == building { - return &dateEntry.Buildings[i] - } - } - - t.Fatalf("building %q not found under date %q", building, dateEntry.Date) - return nil -} - -// helper function to find a specific room entry under a building and fail if it is missing -func findRoom( - t *testing.T, - buildingEntry *schema.SingleBuildingEvents[schema.Event], - room string, -) *schema.RoomEvents[schema.Event] { - t.Helper() - - for i := range buildingEntry.Rooms { - if buildingEntry.Rooms[i].Room == room { - return &buildingEntry.Rooms[i] - } - } - - t.Fatalf("room %q not found under building %q", room, buildingEntry.Building) - return nil -} - -// helper function to create a sample set of map locations for testing purposes -func testMapLocations() []schema.MapBuilding { - return []schema.MapBuilding{ - { - Name: strPtr("Engineering and Computer Science South (ECSS)"), - Acronym: strPtr("ECSS"), - }, - { - Name: strPtr("Jonsson Performance Hall (JO)"), - Acronym: strPtr("JO"), - }, - } -} - -// tests that getLocationAbbreviations correctly reads the mapLocations.json file and returns the expected building abbreviations and valid abbreviation list -func TestGetLocationAbbreviations_Success(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir) - if err != nil { - t.Fatalf("getLocationAbbreviations returned an error: %v", err) - } - - if got := buildingAbbreviations["Engineering and Computer Science South"]; got != "ECSS" { - t.Fatalf("expected Engineering and Computer Science South -> ECSS, got %q", got) - } - - if got := buildingAbbreviations["Jonsson Performance Hall"]; got != "JO" { - t.Fatalf("expected Jonsson Performance Hall -> JO, got %q", got) - } - - if !slices.Contains(validAbbreviations, "ECSS") { - t.Fatalf("expected validAbbreviations to contain ECSS, got %v", validAbbreviations) - } - - if !slices.Contains(validAbbreviations, "JO") { - t.Fatalf("expected validAbbreviations to contain JO, got %v", validAbbreviations) - } -} - -// Tests that if a building has no acronym, an empty string is used as the abbreviation and is included in the validAbbreviations list -func TestGetLocationAbbreviations_NoAcronym(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - - locations := []schema.MapBuilding{ - { - Name: strPtr("Unknown Building"), - }, - } - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), locations) - - buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir) - if err != nil { - t.Fatalf("getLocationAbbreviations returned an error: %v", err) - } - - if got := buildingAbbreviations["Unknown Building"]; got != "" { - t.Fatalf("expected empty-string abbreviation for building with no acronym, got %q", got) - } - - if !slices.Contains(validAbbreviations, "") { - t.Fatalf("expected validAbbreviations to contain empty string, got %v", validAbbreviations) - } -} - -// Tests that getLocationAbbreviations returns an error when mapLocations.json contains invalid JSON. -func TestGetLocationAbbreviations_InvalidJSON(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - - if err := os.WriteFile(filepath.Join(inDir, "mapLocations.json"), []byte("invalid json"), 0o777); err != nil { - t.Fatalf("failed to write invalid json fixture: %v", err) - } - - _, _, err := getLocationAbbreviations(inDir) - if err == nil { - t.Fatalf("expected error for invalid mapLocations.json, got nil") - } -} - -// Tests that ParseCometCalendar correctly processes a single event and stores it in the expected location in the output JSON structure. -func TestParseCometCalendar_ParsesAbbreviationAndRoom(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - outDir := t.TempDir() - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - start := time.Date(2026, 3, 14, 9, 0, 0, 0, time.UTC) - events := []schema.Event{ - makeEvent("Test Event", "ECSS 2.415", start), - } - writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) - - ParseCometCalendar(inDir, outDir) - - result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( - t, - filepath.Join(outDir, "cometCalendar.json"), - ) - - dateEntry := findDate(t, result, "2026-03-14") - buildingEntry := findBuilding(t, dateEntry, "ECSS") - roomEntry := findRoom(t, buildingEntry, "2.415") - - if len(roomEntry.Events) != 1 { - t.Fatalf("expected 1 event in ECSS/2.415 on 2026-03-14, got %d", len(roomEntry.Events)) - } - - if diff := cmp.Diff(events[0].Summary, roomEntry.Events[0].Summary); diff != "" { - t.Fatalf("unexpected event stored in room (-want +got);\n%s", diff) - } -} - -// Tests that ParseCometCalendar correctly resolves full building names to their abbreviations. -func TestParseCometCalendar_FallsBackToFullBuildingName(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - outDir := t.TempDir() - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - start := time.Date(2026, 3, 14, 10, 0, 0, 0, time.UTC) - events := []schema.Event{ - makeEvent("Full Building Name Event", "Engineering and Computer Science South 2.415", start), - } - writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) - - ParseCometCalendar(inDir, outDir) - - result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( - t, - filepath.Join(outDir, "cometCalendar.json"), - ) - - dateEntry := findDate(t, result, "2026-03-14") - buildingEntry := findBuilding(t, dateEntry, "ECSS") - roomEntry := findRoom(t, buildingEntry, "2.415") - - if len(roomEntry.Events) != 1 { - t.Fatalf("expected 1 event in ECSS/2.415 on 2026-03-14, got %d", len(roomEntry.Events)) - } -} - -// Test that if an event has a location that does not match any known building name or abbreviation, that it is categorized as "Other" -func TestParseCometCalendar_UsesOtherForUnknownLocation(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - outDir := t.TempDir() - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - start := time.Date(2026, 3, 14, 11, 0, 0, 0, time.UTC) - events := []schema.Event{ - makeEvent("Unknown Location Event", "Off Campus Location", start), - } - writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) - - ParseCometCalendar(inDir, outDir) - - result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( - t, - filepath.Join(outDir, "cometCalendar.json"), - ) - - dateEntry := findDate(t, result, "2026-03-14") - buildingEntry := findBuilding(t, dateEntry, "Other") - roomEntry := findRoom(t, buildingEntry, "Other") - - if len(roomEntry.Events) != 1 { - t.Fatalf("expected 1 event in Other/Other on 2026-03-14, got %d", len(roomEntry.Events)) - } -} - -// Tests that if multiple events occur in the same building/room on the same day, they are grouped together in the JSON output structure -func TestParseCometCalendar_GroupsEventsByDateBuildingRoom(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - outDir := t.TempDir() - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - start1 := time.Date(2026, 3, 14, 9, 0, 0, 0, time.UTC) - start2 := time.Date(2026, 3, 14, 13, 0, 0, 0, time.UTC) - events := []schema.Event{ - makeEvent("First Grouped Event", "ECSS 2.415", start1), - makeEvent("Second Grouped Event", "ECSS 2.415", start2), - } - writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) - - ParseCometCalendar(inDir, outDir) - - result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( - t, - filepath.Join(outDir, "cometCalendar.json"), - ) - - dateEntry := findDate(t, result, "2026-03-14") - buildingEntry := findBuilding(t, dateEntry, "ECSS") - roomEntry := findRoom(t, buildingEntry, "2.415") - - if len(roomEntry.Events) != 2 { - t.Fatalf("expected 2 events in ECSS/2.415 on 2026-03-14, got %d", len(roomEntry.Events)) - } -} - -// Tests that ParseCometCalendar uses the comma-separated fallback to extract the room when the building is valid and no room was otherwise found. -func TestParseCometCalendar_UsesCommaSeparatedFallbackRoom(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - outDir := t.TempDir() - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - start := time.Date(2026, 3, 14, 12, 0, 0, 0, time.UTC) - events := []schema.Event{ - makeEvent("Conference Room Event", "ECSS, Conference Room", start), - } - writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) - - ParseCometCalendar(inDir, outDir) - - result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( - t, - filepath.Join(outDir, "cometCalendar.json"), - ) - - dateEntry := findDate(t, result, "2026-03-14") - buildingEntry := findBuilding(t, dateEntry, "ECSS") - roomEntry := findRoom(t, buildingEntry, "Conference Room") - - if len(roomEntry.Events) != 1 { - t.Fatalf("expected 1 event in the ECSS/Conference Room, got %d", len(roomEntry.Events)) - } -} - -// Tests that if an event has an empty location, it is categorized as "Other" -func TestParseCometCalendar_UsesOtherForEmptyLocation(t *testing.T) { - t.Parallel() - - inDir := t.TempDir() - outDir := t.TempDir() - - writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) - - start := time.Date(2026, 3, 14, 14, 0, 0, 0, time.UTC) - events := []schema.Event{ - makeEvent("Empty Location Event", "", start), - } - writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) - - ParseCometCalendar(inDir, outDir) - - result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( - t, - filepath.Join(outDir, "cometCalendar.json"), - ) - - dateEntry := findDate(t, result, "2026-03-14") - buildingEntry := findBuilding(t, dateEntry, "Other") - roomEntry := findRoom(t, buildingEntry, "Other") - - if len(roomEntry.Events) != 1 { - t.Fatalf("expected 1 event in Other/Other, got %d", len(roomEntry.Events)) - } -} +package parser + +import ( + "encoding/json" + "os" + "path/filepath" + "slices" + "testing" + "time" + + "go.mongodb.org/mongo-driver/bson/primitive" + + "github.com/UTDNebula/nebula-api/api/schema" + "github.com/google/go-cmp/cmp" +) + +// helper function to read JSON files into Go structs for testing purposes +func readJSONFile[T any](t *testing.T, path string) T { + t.Helper() + + var result T + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("failed to read file %q: %v", path, err) + } + + if err := json.Unmarshal(data, &result); err != nil { + t.Fatalf("failed to unmarshal JSON from file %q: %v", path, err) + } + + return result +} + +// helper function to write Go structs to JSON files for testing purposes +func writeJSONFile(t *testing.T, path string, value any) { + t.Helper() + + data, err := json.Marshal(value) + if err != nil { + t.Fatalf("failed to marshal value to JSON: %v", err) + } + + if err := os.WriteFile(path, data, 0o777); err != nil { + t.Fatalf("failed to write file %q: %v", path, err) + } +} + +// helper function to create a pointer to a string to match expected struct field +func strPtr(s string) *string { + return &s +} + +// helper function to create valid schema.Event object for testing purposes +func makeEvent(summary, location string, startTime time.Time) schema.Event { + return schema.Event{ + Id: primitive.NewObjectID(), + Summary: summary, + Location: location, + StartTime: startTime, + EndTime: startTime.Add(time.Hour), + } +} + +// helper function to find a specific date entry in the parser result and fail if it is missing +func findDate( + t *testing.T, + result []schema.MultiBuildingEvents[schema.Event], + date string, +) *schema.MultiBuildingEvents[schema.Event] { + t.Helper() + + for i := range result { + if result[i].Date == date { + return &result[i] + } + } + + t.Fatalf("date %q not found in result", date) + return nil +} + +// helper function to find a specific building entry under a date and fail if it is missing +func findBuilding( + t *testing.T, + dateEntry *schema.MultiBuildingEvents[schema.Event], + building string, +) *schema.SingleBuildingEvents[schema.Event] { + t.Helper() + + for i := range dateEntry.Buildings { + if dateEntry.Buildings[i].Building == building { + return &dateEntry.Buildings[i] + } + } + + t.Fatalf("building %q not found under date %q", building, dateEntry.Date) + return nil +} + +// helper function to find a specific room entry under a building and fail if it is missing +func findRoom( + t *testing.T, + buildingEntry *schema.SingleBuildingEvents[schema.Event], + room string, +) *schema.RoomEvents[schema.Event] { + t.Helper() + + for i := range buildingEntry.Rooms { + if buildingEntry.Rooms[i].Room == room { + return &buildingEntry.Rooms[i] + } + } + + t.Fatalf("room %q not found under building %q", room, buildingEntry.Building) + return nil +} + +// helper function to create a sample set of map locations for testing purposes +func testMapLocations() []schema.MapBuilding { + return []schema.MapBuilding{ + { + Name: strPtr("Engineering and Computer Science South (ECSS)"), + Acronym: strPtr("ECSS"), + }, + { + Name: strPtr("Jonsson Performance Hall (JO)"), + Acronym: strPtr("JO"), + }, + } +} + +// tests that getLocationAbbreviations correctly reads the mapLocations.json file and returns the expected building abbreviations and valid abbreviation list +func TestGetLocationAbbreviations_Success(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir) + if err != nil { + t.Fatalf("getLocationAbbreviations returned an error: %v", err) + } + + if got := buildingAbbreviations["Engineering and Computer Science South"]; got != "ECSS" { + t.Fatalf("expected Engineering and Computer Science South -> ECSS, got %q", got) + } + + if got := buildingAbbreviations["Jonsson Performance Hall"]; got != "JO" { + t.Fatalf("expected Jonsson Performance Hall -> JO, got %q", got) + } + + if !slices.Contains(validAbbreviations, "ECSS") { + t.Fatalf("expected validAbbreviations to contain ECSS, got %v", validAbbreviations) + } + + if !slices.Contains(validAbbreviations, "JO") { + t.Fatalf("expected validAbbreviations to contain JO, got %v", validAbbreviations) + } +} + +// Tests that if a building has no acronym, an empty string is used as the abbreviation and is included in the validAbbreviations list +func TestGetLocationAbbreviations_NoAcronym(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + + locations := []schema.MapBuilding{ + { + Name: strPtr("Unknown Building"), + }, + } + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), locations) + + buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir) + if err != nil { + t.Fatalf("getLocationAbbreviations returned an error: %v", err) + } + + if got := buildingAbbreviations["Unknown Building"]; got != "" { + t.Fatalf("expected empty-string abbreviation for building with no acronym, got %q", got) + } + + if !slices.Contains(validAbbreviations, "") { + t.Fatalf("expected validAbbreviations to contain empty string, got %v", validAbbreviations) + } +} + +// Tests that getLocationAbbreviations returns an error when mapLocations.json contains invalid JSON. +func TestGetLocationAbbreviations_InvalidJSON(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + + if err := os.WriteFile(filepath.Join(inDir, "mapLocations.json"), []byte("invalid json"), 0o777); err != nil { + t.Fatalf("failed to write invalid json fixture: %v", err) + } + + _, _, err := getLocationAbbreviations(inDir) + if err == nil { + t.Fatalf("expected error for invalid mapLocations.json, got nil") + } +} + +// Tests that ParseCometCalendar correctly processes a single event and stores it in the expected location in the output JSON structure. +func TestParseCometCalendar_ParsesAbbreviationAndRoom(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + outDir := t.TempDir() + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + start := time.Date(2026, 3, 14, 9, 0, 0, 0, time.UTC) + events := []schema.Event{ + makeEvent("Test Event", "ECSS 2.415", start), + } + writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) + + ParseCometCalendar(inDir, outDir) + + result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( + t, + filepath.Join(outDir, "cometCalendar.json"), + ) + + dateEntry := findDate(t, result, "2026-03-14") + buildingEntry := findBuilding(t, dateEntry, "ECSS") + roomEntry := findRoom(t, buildingEntry, "2.415") + + if len(roomEntry.Events) != 1 { + t.Fatalf("expected 1 event in ECSS/2.415 on 2026-03-14, got %d", len(roomEntry.Events)) + } + + if diff := cmp.Diff(events[0].Summary, roomEntry.Events[0].Summary); diff != "" { + t.Fatalf("unexpected event stored in room (-want +got);\n%s", diff) + } +} + +// Tests that ParseCometCalendar correctly resolves full building names to their abbreviations. +func TestParseCometCalendar_FallsBackToFullBuildingName(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + outDir := t.TempDir() + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + start := time.Date(2026, 3, 14, 10, 0, 0, 0, time.UTC) + events := []schema.Event{ + makeEvent("Full Building Name Event", "Engineering and Computer Science South 2.415", start), + } + writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) + + ParseCometCalendar(inDir, outDir) + + result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( + t, + filepath.Join(outDir, "cometCalendar.json"), + ) + + dateEntry := findDate(t, result, "2026-03-14") + buildingEntry := findBuilding(t, dateEntry, "ECSS") + roomEntry := findRoom(t, buildingEntry, "2.415") + + if len(roomEntry.Events) != 1 { + t.Fatalf("expected 1 event in ECSS/2.415 on 2026-03-14, got %d", len(roomEntry.Events)) + } +} + +// Test that if an event has a location that does not match any known building name or abbreviation, that it is categorized as "Other" +func TestParseCometCalendar_UsesOtherForUnknownLocation(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + outDir := t.TempDir() + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + start := time.Date(2026, 3, 14, 11, 0, 0, 0, time.UTC) + events := []schema.Event{ + makeEvent("Unknown Location Event", "Off Campus Location", start), + } + writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) + + ParseCometCalendar(inDir, outDir) + + result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( + t, + filepath.Join(outDir, "cometCalendar.json"), + ) + + dateEntry := findDate(t, result, "2026-03-14") + buildingEntry := findBuilding(t, dateEntry, "Other") + roomEntry := findRoom(t, buildingEntry, "Other") + + if len(roomEntry.Events) != 1 { + t.Fatalf("expected 1 event in Other/Other on 2026-03-14, got %d", len(roomEntry.Events)) + } +} + +// Tests that if multiple events occur in the same building/room on the same day, they are grouped together in the JSON output structure +func TestParseCometCalendar_GroupsEventsByDateBuildingRoom(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + outDir := t.TempDir() + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + start1 := time.Date(2026, 3, 14, 9, 0, 0, 0, time.UTC) + start2 := time.Date(2026, 3, 14, 13, 0, 0, 0, time.UTC) + events := []schema.Event{ + makeEvent("First Grouped Event", "ECSS 2.415", start1), + makeEvent("Second Grouped Event", "ECSS 2.415", start2), + } + writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) + + ParseCometCalendar(inDir, outDir) + + result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( + t, + filepath.Join(outDir, "cometCalendar.json"), + ) + + dateEntry := findDate(t, result, "2026-03-14") + buildingEntry := findBuilding(t, dateEntry, "ECSS") + roomEntry := findRoom(t, buildingEntry, "2.415") + + if len(roomEntry.Events) != 2 { + t.Fatalf("expected 2 events in ECSS/2.415 on 2026-03-14, got %d", len(roomEntry.Events)) + } +} + +// Tests that ParseCometCalendar uses the comma-separated fallback to extract the room when the building is valid and no room was otherwise found. +func TestParseCometCalendar_UsesCommaSeparatedFallbackRoom(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + outDir := t.TempDir() + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + start := time.Date(2026, 3, 14, 12, 0, 0, 0, time.UTC) + events := []schema.Event{ + makeEvent("Conference Room Event", "ECSS, Conference Room", start), + } + writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) + + ParseCometCalendar(inDir, outDir) + + result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( + t, + filepath.Join(outDir, "cometCalendar.json"), + ) + + dateEntry := findDate(t, result, "2026-03-14") + buildingEntry := findBuilding(t, dateEntry, "ECSS") + roomEntry := findRoom(t, buildingEntry, "Conference Room") + + if len(roomEntry.Events) != 1 { + t.Fatalf("expected 1 event in the ECSS/Conference Room, got %d", len(roomEntry.Events)) + } +} + +// Tests that if an event has an empty location, it is categorized as "Other" +func TestParseCometCalendar_UsesOtherForEmptyLocation(t *testing.T) { + t.Parallel() + + inDir := t.TempDir() + outDir := t.TempDir() + + writeJSONFile(t, filepath.Join(inDir, "mapLocations.json"), testMapLocations()) + + start := time.Date(2026, 3, 14, 14, 0, 0, 0, time.UTC) + events := []schema.Event{ + makeEvent("Empty Location Event", "", start), + } + writeJSONFile(t, filepath.Join(inDir, "cometCalendarScraped.json"), events) + + ParseCometCalendar(inDir, outDir) + + result := readJSONFile[[]schema.MultiBuildingEvents[schema.Event]]( + t, + filepath.Join(outDir, "cometCalendar.json"), + ) + + dateEntry := findDate(t, result, "2026-03-14") + buildingEntry := findBuilding(t, dateEntry, "Other") + roomEntry := findRoom(t, buildingEntry, "Other") + + if len(roomEntry.Events) != 1 { + t.Fatalf("expected 1 event in Other/Other, got %d", len(roomEntry.Events)) + } +} diff --git a/parser/courseParser.go b/parser/courseParser.go index 7d59baf..5bacd98 100644 --- a/parser/courseParser.go +++ b/parser/courseParser.go @@ -19,6 +19,9 @@ var ( // contactRegexp matches the contact hours and offering frequency from the course description // (e.g. "(12-34) SUS") contactRegexp = regexp.MustCompile(`\(([0-9]+)-([0-9]+)\)\s+([SUFY]+)`) + + // descriptionWhitespaceRegexp matches one or more whitespace characters (including newlines) + descriptionWhitespaceRegexp = regexp.MustCompile(`\s+`) ) // parseCourse returns a pointer to the course specified by the @@ -50,15 +53,18 @@ func parseCourse(internalCourseNumber string, session schema.AcademicSession, ro // This function does not modify any global state. // Returns a pointer to the newly created schema.Course object. func getCourse(internalCourseNumber string, session schema.AcademicSession, rowInfo map[string]*goquery.Selection, classInfo map[string]string) *schema.Course { - CoursePrefix, CourseNumber := getPrefixAndNumber(classInfo) + coursePrefix, courseNumber := getPrefixAndNumber(classInfo) + + description := descriptionWhitespaceRegexp.ReplaceAllString(rowInfo["Description:"].Text(), " ") + school := descriptionWhitespaceRegexp.ReplaceAllString(rowInfo["College:"].Text(), " ") course := schema.Course{ Id: primitive.NewObjectID(), - Course_number: CourseNumber, - Subject_prefix: CoursePrefix, + Course_number: courseNumber, + Subject_prefix: coursePrefix, Title: utils.TrimWhitespace(rowInfo["Course Title:"].Text()), - Description: utils.TrimWhitespace(rowInfo["Description:"].Text()), - School: utils.TrimWhitespace(rowInfo["College:"].Text()), + Description: utils.TrimWhitespace(description), + School: utils.TrimWhitespace(school), Credit_hours: classInfo["Semester Credit Hours:"], Class_level: classInfo["Class Level:"], Activity_type: classInfo["Activity Type:"], diff --git a/parser/gradeLoader.go b/parser/gradeLoader.go index cff86a3..ba51876 100644 --- a/parser/gradeLoader.go +++ b/parser/gradeLoader.go @@ -5,79 +5,64 @@ import ( "fmt" "log" "os" - "path/filepath" + "regexp" "strconv" "strings" -) -var grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"} + "github.com/UTDNebula/api-tools/utils" +) -func loadGrades(csvDir string) map[string]map[string][]int { +var ( + grades = []string{"A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F", "W", "P", "CR", "NC", "I"} + optionalColumns = []string{"W", "P", "CR", "NC", "I"} + requiredColumns = []string{"Section", "Subject", "Catalog Number", "A+"} + semesterRegex = regexp.MustCompile(`[1-9][0-9][USF]`) +) +func loadGrades(csvDir string) (map[string]map[string][]int, error) { // MAP[SEMESTER] -> MAP[SUBJECT + NUMBER + SECTION] -> GRADE DISTRIBUTION gradeMap := make(map[string]map[string][]int) - if csvDir == "" { - log.Print("No grade data CSV directory specified. Grade data will not be included.") - return gradeMap - } - - dirPtr, err := os.Open(csvDir) - if err != nil { - panic(err) - } - defer dirPtr.Close() + fileNames := utils.GetAllFilesWithExtension(csvDir, ".csv") + for _, name := range fileNames { - csvFiles, err := dirPtr.ReadDir(-1) - if err != nil { - panic(err) - } - - for _, csvEntry := range csvFiles { - - if csvEntry.IsDir() { - continue + semester := semesterRegex.FindString(name) + if semester == "" { + return gradeMap, fmt.Errorf("invalid name %s, must match format {>10}{F,S,U} i.e. 22F", name) } - csvPath := fmt.Sprintf("%s/%s", csvDir, csvEntry.Name()) - - csvFile, err := os.Open(csvPath) + var err error + gradeMap[semester], err = csvToMap(name) if err != nil { - panic(err) - } - defer csvFile.Close() - - // Create logs directory - if _, err := os.Stat("./logs/grades"); err != nil { - os.Mkdir("./logs/grades", os.ModePerm) + return gradeMap, fmt.Errorf("error parsing %s: %v", name, err) } + } - // Create log file [name of csv].log in logs directory - basePath := filepath.Base(csvPath) - csvName := strings.TrimSuffix(basePath, filepath.Ext(basePath)) - logFile, err := os.Create("./logs/grades/" + csvName + ".log") + return gradeMap, nil +} - if err != nil { - log.Panic("Could not create CSV log file.") +func csvToMap(filename string) (map[string][]int, error) { + file, err := os.Open(filename) + if err != nil { + return nil, fmt.Errorf("error opening CSV file '%s': %v", filename, err) + } + defer func(file *os.File) { + if err := file.Close(); err != nil { + log.Printf("failed to close file '%s': %v", filename, err) } - defer logFile.Close() + }(file) - // Put data from csv into map - gradeMap[csvName] = csvToMap(csvFile, logFile) + reader := csv.NewReader(file) + records, err := reader.ReadAll() + if err != nil { + return nil, fmt.Errorf("error parsing %s: %v", filename, err) } - return gradeMap -} - -func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { - reader := csv.NewReader(csvFile) - records, err := reader.ReadAll() // records is [][]strings - if err != nil { - log.Panicf("Error parsing %s: %s", csvFile.Name(), err.Error()) + if len(records) == 0 { + return nil, fmt.Errorf("empty CSV file '%s'", filename) } indexMap := make(map[string]int) - for j, col := range records[0] { switch col { case "Catalog Number", "Catalog Nbr": @@ -89,18 +74,15 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { } } - // required columns - for _, name := range []string{"Section", "Subject", "Catalog Number", "A+"} { + for _, name := range requiredColumns { if _, ok := indexMap[name]; !ok { - fmt.Fprintf(logFile, "could not find %s column", name) - log.Panicf("could not find %s column", name) + return nil, fmt.Errorf("could not find %s column in %s", name, filename) } } - // optional columns - for _, name := range []string{"W", "P", "CR", "NC", "I"} { + for _, name := range optionalColumns { if _, ok := indexMap[name]; !ok { - fmt.Fprintf(logFile, "could not find %s column\n", name) + utils.VPrintf("could not find %s column in %s", name, filename) } } @@ -109,7 +91,6 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { catalogNumberCol := indexMap["Catalog Number"] distroMap := make(map[string][]int) - for _, record := range records[1:] { // convert grade distribution from string to int intSlice := make([]int, len(grades)) @@ -125,5 +106,6 @@ func csvToMap(csvFile *os.File, logFile *os.File) map[string][]int { distroKey := record[subjectCol] + record[catalogNumberCol] + trimmedSectionNumber distroMap[distroKey] = intSlice[:] } - return distroMap + + return distroMap, nil } diff --git a/parser/gradeLoader_test.go b/parser/gradeLoader_test.go new file mode 100644 index 0000000..efb3e1c --- /dev/null +++ b/parser/gradeLoader_test.go @@ -0,0 +1,134 @@ +package parser + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" +) + +var ( + gradeLoaderTestCases = map[string]struct { + csvContent string + want map[string][]int + fail bool + }{ + "Valid_Data": { + csvContent: `Instructor 1,Instructor 2,Instructor 3,Instructor 4,Instructor 5,Instructor 6,Subject,"Catalog Nbr",Section,A+,A,A-,B+,B,B-,C+,C,C-,D+,D,D-,F,NF,CR,I,NC,P,W +"Curchack, Fred",,,,,,AP,3300,501,6,4,2,2,1,3,1,1,,,,,1,,,,,,0 +"Anjum, Zafar",,,,,,ARAB,1311,001,,26,,,1,,,,,,,,,,,,,,2`, + want: map[string][]int{ + "AP3300501": {6, 4, 2, 2, 1, 3, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, + "ARAB13111": {0, 26, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0}, + }, + fail: false, + }, + "Missing_Required_Column_A+": { + csvContent: `Subject,"Catalog Nbr",Section,A,A-,B+ +CS,1337,001,10,5,5`, + fail: true, + }, + "Missing_Required_Column_Subject": { + csvContent: `Instructor,"Catalog Nbr",Section,A+,A +Doe,1337,001,10,5`, + fail: true, + }, + "Empty_File": { + csvContent: ``, + fail: true, + }, + } +) + +func TestLoadGrades(t *testing.T) { + + invalidCSVNames := []string{"22", "2F", "2022F", "20-U", "15Fall"} + + for i, name := range invalidCSVNames { + t.Run( + fmt.Sprintf("Invalid_CSV_Name_%d", i), func(t *testing.T) { + tempDir := t.TempDir() + + temp, err := os.Create(filepath.Join(tempDir, name+".csv")) + if err != nil { + t.Errorf("failed to create temp file: %v", err) + } + defer temp.Close() + + _, err = loadGrades(tempDir) + if err == nil { + t.Errorf("expected error but got none") + } + }, + ) + } + + validCSVNames := []string{"25F", "18U", "26S"} + for i, name := range validCSVNames { + t.Run( + fmt.Sprintf("Valid_CSV_Name_%d", i), func(t *testing.T) { + tempDir := t.TempDir() + + temp, err := os.Create(filepath.Join(tempDir, name+".csv")) + if err != nil { + t.Errorf("failed to create temp file: %v", err) + } + defer temp.Close() + + _, err = temp.WriteString(gradeLoaderTestCases["Valid_Data"].csvContent) + if err != nil { + t.Errorf("failed to write test data: %v", err) + } + + _, err = loadGrades(tempDir) + if err != nil { + t.Errorf("valid .csv failed: %v", err) + } + }, + ) + } + + t.Run("Real_Data", func(t *testing.T) { + _, err := loadGrades("../grade-data/") + if err != nil { + t.Errorf("failed to load grades: %v", err) + } + }) +} + +func TestCSVToMap(t *testing.T) { + tempDir := t.TempDir() + + for name, testCase := range gradeLoaderTestCases { + t.Run(name, func(t *testing.T) { + + temp, err := os.CreateTemp(tempDir, "grades*.csv") + if err != nil { + t.Errorf("failed to create temp file: %v", err) + } + defer temp.Close() + + if _, err = temp.WriteString(testCase.csvContent); err != nil { + t.Errorf("failed to write test data: %v", err) + } + + output, err := csvToMap(temp.Name()) + if err != nil { + if testCase.fail { + return + } + t.Errorf("failed to load csv: %v", err) + } else if testCase.fail { + t.Errorf("expected failure but got none") + } else { + diff := cmp.Diff(testCase.want, output) + if diff != "" { + t.Errorf("Failed (-expected +got)\n %s", diff) + } + } + + }) + } +} diff --git a/parser/parser.go b/parser/parser.go index 2b7049e..1dd2241 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -40,17 +40,25 @@ var ( timeLocation, timeError = time.LoadLocation("America/Chicago") ) +func init() { + if timeError != nil { + log.Fatalf("Failed to initialize timeLocation: %v", timeError) + } +} + // Parse loads scraped course artifacts, applies parsing and validation, and persists structured results. func Parse(inDir string, outDir string, csvPath string, skipValidation bool) { - // Panic if timeLocation didn't load properly - if timeError != nil { - panic(timeError) - } + if csvPath == "" { + log.Print("No grade data CSV directory specified. Grade data will not be included.") + } else { + var err error + GradeMap, err = loadGrades(csvPath) - // Load grade data from csv in advance - GradeMap = loadGrades(csvPath) - if len(GradeMap) != 0 { + if err != nil { + log.Fatalf("Failed to load grade data: %v", err) + return + } log.Printf("Loaded grade distributions for %d semesters.", len(GradeMap)) } diff --git a/parser/parser_test.go b/parser/parser_test.go index cee8873..41e7af4 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -34,7 +34,7 @@ type TestData struct { // testData global dictionary containing the data from /testdata by folder name var testData map[string]TestData -// TestMain loads parser fixtures and handles the -update flag for regenerating expectations. +// TestMain loads parser fixtures and handles the `-update` flag for regenerating expectations. func TestMain(m *testing.M) { update := flag.Bool("update", false, "Regenerates the expected output for the provided test inputs. Should only be used when you are 100% sure your code is correct! It will make all test pass :)") @@ -132,10 +132,13 @@ func updateTestData() error { } defer os.RemoveAll(tempDir) - //Fill temp dir with all the test cases and expected values + GradeMap, err = loadGrades("../grade-data") + if err != nil { + return err + } + //Fill temp dir with all the test cases and expected values duplicates := make(map[string]bool) - for i, input := range utils.GetAllFilesWithExtension("testdata", ".html") { parse(input) @@ -214,20 +217,66 @@ func updateTestData() error { //rerun parser to get Courses.json, Sections.json, Professors.json - //Parse(tempDir, tempDir, "../grade-data", false) - //Grade data isn't work with tests currently - Parse(tempDir, tempDir, "", false) + Parse(tempDir, tempDir, "../grade-data", false) + + targetDir := "testdata" + + err = filepath.Walk(tempDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relPath, err := filepath.Rel(tempDir, path) + if err != nil { + return err + } + destPath := filepath.Join(targetDir, relPath) + + if info.IsDir() { + return os.MkdirAll(destPath, 0755) + } + + newContent, err := os.ReadFile(path) + if err != nil { + return err + } + + if existingContent, err := os.ReadFile(destPath); err == nil { + if bytes.Equal(newContent, existingContent) { + return nil + } + } + + log.Printf("Updating file: %s", destPath) + return os.WriteFile(destPath, newContent, 0644) + }) - //overwrite the current test data with the new data - if err := os.RemoveAll("testdata"); err != nil { - return fmt.Errorf("failed to remove testdata: %v", err) + if err != nil { + return fmt.Errorf("failed to sync test data: %v", err) } - if err := os.CopyFS("testdata", os.DirFS(tempDir)); err != nil { - return fmt.Errorf("failed to copy testdata: %v", err) + err = filepath.Walk(targetDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + relPath, err := filepath.Rel(targetDir, path) + if err != nil { + return err + } + + srcPath := filepath.Join(tempDir, relPath) + if _, err := os.Stat(srcPath); os.IsNotExist(err) { + log.Printf("Removing stale file: %s", path) + return os.RemoveAll(path) + } + return nil + }) + + if err != nil { + return fmt.Errorf("failed to cleanup stale data: %v", err) } - //reset maps to avoid side effects. maybe parser should be an object? clearGlobals() return nil } @@ -244,8 +293,8 @@ func clearGlobals() { // TestParse verifies that parsing input fixtures generates the expected JSON exports. func TestParse(t *testing.T) { tempDir := t.TempDir() - // todo fix grade data, csvPath = ./grade-data panics - Parse("testdata", tempDir, "", false) + + Parse("testdata", tempDir, "../grade-data", false) OutputCourses, err := unmarshallFile[[]schema.Course](filepath.Join(tempDir, "courses.json")) if err != nil { diff --git a/parser/testdata/case_000/course.json b/parser/testdata/case_000/course.json index 5e342be..3f26745 100644 --- a/parser/testdata/case_000/course.json +++ b/parser/testdata/case_000/course.json @@ -1,9 +1,9 @@ { - "_id": "67d07ee0c972c18731e23bd7", + "_id": "69cda9299bb0b8cd4cf4c71c", "subject_prefix": "ACCT", "course_number": "2301", "title": "Introductory Financial Accounting", - "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", + "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", "enrollment_reqs": "ACCT 2301 Repeat Restriction", "school": "Naveen Jindal School of Management", "credit_hours": "3", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bd8" + "69cda9299bb0b8cd4cf4c71d" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_000/professors.json b/parser/testdata/case_000/professors.json index 207c908..b3436be 100644 --- a/parser/testdata/case_000/professors.json +++ b/parser/testdata/case_000/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23bd9", + "_id": "69cda9299bb0b8cd4cf4c71e", "first_name": "Naim Bugra", "last_name": "Ozel", "titles": [ @@ -17,11 +17,11 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bd8" + "69cda9299bb0b8cd4cf4c71d" ] }, { - "_id": "67d07ee0c972c18731e23bda", + "_id": "69cda9299bb0b8cd4cf4c71f", "first_name": "Jieying", "last_name": "Zhang", "titles": [ @@ -38,7 +38,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bd8" + "69cda9299bb0b8cd4cf4c71d" ] } ] diff --git a/parser/testdata/case_000/section.json b/parser/testdata/case_000/section.json index a67c0f5..adf994c 100644 --- a/parser/testdata/case_000/section.json +++ b/parser/testdata/case_000/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23bd8", + "_id": "69cda9299bb0b8cd4cf4c71d", "section_number": "003", - "course_reference": "67d07ee0c972c18731e23bd7", + "course_reference": "69cda9299bb0b8cd4cf4c71c", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,8 +9,8 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bd9", - "67d07ee0c972c18731e23bda" + "69cda9299bb0b8cd4cf4c71e", + "69cda9299bb0b8cd4cf4c71f" ], "teaching_assistants": [ { @@ -48,6 +48,25 @@ ], "core_flags": [], "syllabus_uri": "https://dox.utdallas.edu/syl152555", - "grade_distribution": [], + "grade_distribution": [ + 9, + 9, + 4, + 6, + 4, + 5, + 12, + 3, + 1, + 3, + 1, + 0, + 4, + 3, + 0, + 0, + 0, + 0 + ], "attributes": null } diff --git a/parser/testdata/case_001/course.json b/parser/testdata/case_001/course.json index 24dcf8b..fd0a985 100644 --- a/parser/testdata/case_001/course.json +++ b/parser/testdata/case_001/course.json @@ -1,9 +1,9 @@ { - "_id": "67d07ee0c972c18731e23bdb", + "_id": "69cda9299bb0b8cd4cf4c720", "subject_prefix": "ACCT", "course_number": "2301", "title": "Introductory Financial Accounting", - "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", + "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", "enrollment_reqs": "ACCT 2301 Repeat Restriction", "school": "Naveen Jindal School of Management", "credit_hours": "3", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bdc" + "69cda9299bb0b8cd4cf4c721" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_001/professors.json b/parser/testdata/case_001/professors.json index 8bf8c6b..1231428 100644 --- a/parser/testdata/case_001/professors.json +++ b/parser/testdata/case_001/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23bdd", + "_id": "69cda9299bb0b8cd4cf4c722", "first_name": "Jieying", "last_name": "Zhang", "titles": [ @@ -17,11 +17,11 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bdc" + "69cda9299bb0b8cd4cf4c721" ] }, { - "_id": "67d07ee0c972c18731e23bde", + "_id": "69cda9299bb0b8cd4cf4c723", "first_name": "Naim Bugra", "last_name": "Ozel", "titles": [ @@ -38,7 +38,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bdc" + "69cda9299bb0b8cd4cf4c721" ] } ] diff --git a/parser/testdata/case_001/section.json b/parser/testdata/case_001/section.json index eeb9360..a5039fd 100644 --- a/parser/testdata/case_001/section.json +++ b/parser/testdata/case_001/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23bdc", + "_id": "69cda9299bb0b8cd4cf4c721", "section_number": "001", - "course_reference": "67d07ee0c972c18731e23bdb", + "course_reference": "69cda9299bb0b8cd4cf4c720", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,8 +9,8 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bdd", - "67d07ee0c972c18731e23bde" + "69cda9299bb0b8cd4cf4c722", + "69cda9299bb0b8cd4cf4c723" ], "teaching_assistants": [ { @@ -48,6 +48,25 @@ ], "core_flags": [], "syllabus_uri": "https://dox.utdallas.edu/syl152552", - "grade_distribution": [], + "grade_distribution": [ + 2, + 7, + 5, + 8, + 8, + 7, + 3, + 6, + 1, + 3, + 3, + 0, + 9, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null } diff --git a/parser/testdata/case_002/course.json b/parser/testdata/case_002/course.json index 141ff6b..8693456 100644 --- a/parser/testdata/case_002/course.json +++ b/parser/testdata/case_002/course.json @@ -1,9 +1,9 @@ { - "_id": "67d07ee0c972c18731e23bdf", + "_id": "69cda9299bb0b8cd4cf4c724", "subject_prefix": "BA", "course_number": "1320", "title": "Business in a Global World", - "description": "BA 1320 - Business in a Global World (3 semester credit hours) This course provides a primer on the impacts of globalization on business. We equip students with the basic facts of globalization and examine the business underpinnings and the institutions that shape globalization. We discuss major trends and the future of international management. The aim is an ability to think strategically and critically about global business issues. (3-0) S", + "description": "BA 1320 - Business in a Global World (3 semester credit hours) This course provides a primer on the impacts of globalization on business. We equip students with the basic facts of globalization and examine the business underpinnings and the institutions that shape globalization. We discuss major trends and the future of international management. The aim is an ability to think strategically and critically about global business issues. (3-0) S", "enrollment_reqs": "BA 1320 Repeat Restriction", "school": "Naveen Jindal School of Management", "credit_hours": "3", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be0" + "69cda9299bb0b8cd4cf4c725" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_002/professors.json b/parser/testdata/case_002/professors.json index c6913f6..0316d96 100644 --- a/parser/testdata/case_002/professors.json +++ b/parser/testdata/case_002/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23be1", + "_id": "69cda9299bb0b8cd4cf4c726", "first_name": "Peter", "last_name": "Lewin", "titles": [ @@ -17,7 +17,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23be0" + "69cda9299bb0b8cd4cf4c725" ] } ] diff --git a/parser/testdata/case_002/section.json b/parser/testdata/case_002/section.json index 6eb44f5..e28a111 100644 --- a/parser/testdata/case_002/section.json +++ b/parser/testdata/case_002/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be0", + "_id": "69cda9299bb0b8cd4cf4c725", "section_number": "501", - "course_reference": "67d07ee0c972c18731e23bdf", + "course_reference": "69cda9299bb0b8cd4cf4c724", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,7 +9,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23be1" + "69cda9299bb0b8cd4cf4c726" ], "teaching_assistants": [ { @@ -44,6 +44,25 @@ "090" ], "syllabus_uri": "https://dox.utdallas.edu/syl153033", - "grade_distribution": [], + "grade_distribution": [ + 0, + 13, + 23, + 6, + 4, + 7, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null } diff --git a/parser/testdata/case_003/course.json b/parser/testdata/case_003/course.json index 94219f8..308850d 100644 --- a/parser/testdata/case_003/course.json +++ b/parser/testdata/case_003/course.json @@ -1,9 +1,9 @@ { - "_id": "67d07ee0c972c18731e23be2", + "_id": "69cda9299bb0b8cd4cf4c727", "subject_prefix": "BIOL", "course_number": "6111", "title": "Graduate Research Presentation", - "description": "BIOL 6111 - Graduate Research Presentation (1 semester credit hour) This course will train graduate students (MS and PhD) in hypothesis building and testing, designing, and conducting experiments, and presenting scientific findings in an efficient and clear manner. During the class, graduate students will discuss and present their graduate research work-in-progress. Significant time outside of class will also be required to analyze data, assemble, and practice presentations. May be repeated for credit as topics vary (2 semester credit hours maximum). Department consent required. (1-0) S", + "description": "BIOL 6111 - Graduate Research Presentation (1 semester credit hour) This course will train graduate students (MS and PhD) in hypothesis building and testing, designing, and conducting experiments, and presenting scientific findings in an efficient and clear manner. During the class, graduate students will discuss and present their graduate research work-in-progress. Significant time outside of class will also be required to analyze data, assemble, and practice presentations. May be repeated for credit as topics vary (2 semester credit hours maximum). Department consent required. (1-0) S", "enrollment_reqs": "", "school": "School of Natural Sciences and Mathematics", "credit_hours": "1", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be3" + "69cda9299bb0b8cd4cf4c728" ], "lecture_contact_hours": "1", "laboratory_contact_hours": "0", diff --git a/parser/testdata/case_003/professors.json b/parser/testdata/case_003/professors.json index 3cb4a51..9913649 100644 --- a/parser/testdata/case_003/professors.json +++ b/parser/testdata/case_003/professors.json @@ -1,6 +1,6 @@ [ { - "_id": "67d07ee0c972c18731e23be4", + "_id": "69cda9299bb0b8cd4cf4c729", "first_name": "Tian", "last_name": "Hong", "titles": [ @@ -17,7 +17,7 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23be3" + "69cda9299bb0b8cd4cf4c728" ] } ] diff --git a/parser/testdata/case_003/section.json b/parser/testdata/case_003/section.json index fff4105..93541b3 100644 --- a/parser/testdata/case_003/section.json +++ b/parser/testdata/case_003/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be3", + "_id": "69cda9299bb0b8cd4cf4c728", "section_number": "016", - "course_reference": "67d07ee0c972c18731e23be2", + "course_reference": "69cda9299bb0b8cd4cf4c727", "section_corequisites": null, "academic_session": { "name": "25S", @@ -9,7 +9,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23be4" + "69cda9299bb0b8cd4cf4c729" ], "teaching_assistants": [], "internal_class_number": "29611", diff --git a/parser/testdata/case_004/course.json b/parser/testdata/case_004/course.json index d8c5383..b5431c7 100644 --- a/parser/testdata/case_004/course.json +++ b/parser/testdata/case_004/course.json @@ -1,9 +1,9 @@ { - "_id": "67d07ee0c972c18731e23be5", + "_id": "69cda9299bb0b8cd4cf4c72a", "subject_prefix": "AERO", "course_number": "3320", "title": "- Recitation", - "description": "- ()", + "description": "- ()", "enrollment_reqs": "", "school": "Undergraduate Studies", "credit_hours": "Non-Enroll", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be6" + "69cda9299bb0b8cd4cf4c72b" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", diff --git a/parser/testdata/case_004/section.json b/parser/testdata/case_004/section.json index 2481524..bdc3217 100644 --- a/parser/testdata/case_004/section.json +++ b/parser/testdata/case_004/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be6", + "_id": "69cda9299bb0b8cd4cf4c72b", "section_number": "201", - "course_reference": "67d07ee0c972c18731e23be5", + "course_reference": "69cda9299bb0b8cd4cf4c72a", "section_corequisites": null, "academic_session": { "name": "25S", diff --git a/parser/testdata/case_005/course.json b/parser/testdata/case_005/course.json index 9095afc..9116fdb 100644 --- a/parser/testdata/case_005/course.json +++ b/parser/testdata/case_005/course.json @@ -1,9 +1,9 @@ { - "_id": "67d07ee0c972c18731e23be7", + "_id": "69cda9299bb0b8cd4cf4c72c", "subject_prefix": "AERO", "course_number": "4320", "title": "- Laboratory", - "description": "- ()", + "description": "- ()", "enrollment_reqs": "", "school": "Undergraduate Studies", "credit_hours": "4", @@ -15,7 +15,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23be8" + "69cda9299bb0b8cd4cf4c72d" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", diff --git a/parser/testdata/case_005/section.json b/parser/testdata/case_005/section.json index 712c972..d89794b 100644 --- a/parser/testdata/case_005/section.json +++ b/parser/testdata/case_005/section.json @@ -1,7 +1,7 @@ { - "_id": "67d07ee0c972c18731e23be8", + "_id": "69cda9299bb0b8cd4cf4c72d", "section_number": "002", - "course_reference": "67d07ee0c972c18731e23be7", + "course_reference": "69cda9299bb0b8cd4cf4c72c", "section_corequisites": null, "academic_session": { "name": "25S", diff --git a/parser/testdata/case_006/classInfo.json b/parser/testdata/case_006/classInfo.json new file mode 100644 index 0000000..34e725a --- /dev/null +++ b/parser/testdata/case_006/classInfo.json @@ -0,0 +1,14 @@ +{ + "": "", + "Activity Type:": "Lecture", + "Add Consent:": "No Consent", + "Class Level:": "Undergraduate", + "Class Section:": "THEA1310.001.25S", + "Class/Course Number:": "24043 / 003909", + "Grading:": "Graded - Undergraduate", + "How often a course is scheduled:": "Once Each Long Semester", + "Instruction Mode:": "Face-to-Face", + "Orion Date/Time:": "2025-08-28 06:30:01", + "Semester Credit Hours:": "3", + "Session Type:": "Regular Academic Session" +} diff --git a/parser/testdata/case_006/course.json b/parser/testdata/case_006/course.json new file mode 100644 index 0000000..e11f4c8 --- /dev/null +++ b/parser/testdata/case_006/course.json @@ -0,0 +1,25 @@ +{ + "_id": "69cda9299bb0b8cd4cf4c72e", + "subject_prefix": "THEA", + "course_number": "1310", + "title": "Understanding Theatre", + "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S", + "enrollment_reqs": "", + "school": "School of Arts, Humanities, and Technology", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "003909", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "69cda9299bb0b8cd4cf4c72f" + ], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null +} diff --git a/parser/testdata/case_006/input.html b/parser/testdata/case_006/input.html new file mode 100644 index 0000000..e4efe97 --- /dev/null +++ b/parser/testdata/case_006/input.html @@ -0,0 +1,268 @@ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Course Title: + + Understanding Theatre +
+ Class Info: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Class Section: + + THEA1310.001.25S + + Instruction Mode: + + Face-to-Face +
+ Class Level: + + Undergraduate + + Activity Type: + + Lecture +
+ Semester Credit Hours: + + 3 + + Class/Course Number: + + 24043 / 003909 +
+ Grading: + + Graded - Undergraduate + + Session Type: + + Regular Academic Session +
+ Add Consent: + + No Consent + + Orion Date/Time: + + 2025-08-28 06:30:01 +
+ How often a course is scheduled: + + Once Each Long Semester + + +
+ +
+ Status: + + Enrollment Status: OPEN    Available Seats: 15    Enrolled + Total: 33    Waitlist: 0 + +
+ Description: + + THEA 1310 - Understanding Theatre (3 + semester credit hours) Lectures, discussions, and performances designed to explore artistic, + philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics + may include analysis of scripts, the nature of the theater compared to the other performing arts, + and the nature of popular entertainments. (3-0) S + +
+ Instructor(s): + +
+
Damian Enyaosah ・ Primary Instructor ・ dpe190000@utdallas.edu +
+ +
TA/RA(s):(none)
+ Schedule: + +
+

Class Location and Times

+

Term: 25S
Type: Regular Academic Session
Starts: + January 21, 2025
Ends: May 16, 2025

+ +
+

+ January 21, 2025-May 9, 2025
+ Monday, Wednesday
+ 10:00am-11:15am
+ FO 1.202 + +

+
+
FO Building
Floor 1 - Room 1.202
+ +
+
+
+ +
+
+ +
Core:Texas Core Areas 050+090 - Creative Arts + CAO
+ College: + + School of Arts, Humanities, and + Technology + +
+ Syllabus: + + Syllabus for Understanding Theatre + (THEA1310.001.25S) + +
+ Evaluation: + + Class evaluation for + Understanding Theatre (THEA1310.001.25S) + +
+
+
The direct link to this class is: https://go.utdallas.edu/thea1310.001.25s
+
+ Register for this class on Orion: https://orion.utdallas.edu +
+
+ +
diff --git a/parser/testdata/case_006/professors.json b/parser/testdata/case_006/professors.json new file mode 100644 index 0000000..57023a3 --- /dev/null +++ b/parser/testdata/case_006/professors.json @@ -0,0 +1,23 @@ +[ + { + "_id": "69cda9299bb0b8cd4cf4c730", + "first_name": "Damian", + "last_name": "Enyaosah", + "titles": [ + "Primary Instructor" + ], + "email": "dpe190000@utdallas.edu", + "phone_number": "", + "office": { + "building": "", + "room": "", + "map_uri": "" + }, + "profile_uri": "", + "image_uri": "", + "office_hours": null, + "sections": [ + "69cda9299bb0b8cd4cf4c72f" + ] + } +] diff --git a/parser/testdata/case_006/section.json b/parser/testdata/case_006/section.json new file mode 100644 index 0000000..6cce905 --- /dev/null +++ b/parser/testdata/case_006/section.json @@ -0,0 +1,61 @@ +{ + "_id": "69cda9299bb0b8cd4cf4c72f", + "section_number": "001", + "course_reference": "69cda9299bb0b8cd4cf4c72e", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": [ + "69cda9299bb0b8cd4cf4c730" + ], + "teaching_assistants": [], + "internal_class_number": "24043", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": [ + "Monday", + "Wednesday" + ], + "start_time": "10:00am", + "end_time": "11:15am", + "modality": "", + "location": { + "building": "FO", + "room": "1.202", + "map_uri": "https://locator.utdallas.edu/FO_1.202" + } + } + ], + "core_flags": [ + "050", + "090" + ], + "syllabus_uri": "https://dox.utdallas.edu/syl154931", + "grade_distribution": [ + 4, + 7, + 9, + 3, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 0, + 5, + 0, + 0, + 0, + 0, + 0 + ], + "attributes": null +} diff --git a/parser/testdata/courses.json b/parser/testdata/courses.json index 78ae4d4..2808543 100644 --- a/parser/testdata/courses.json +++ b/parser/testdata/courses.json @@ -1,10 +1,86 @@ [ { - "_id": "67d07ee0c972c18731e23bee", + "_id": "69cda9299bb0b8cd4cf4c73e", + "subject_prefix": "AERO", + "course_number": "4320", + "title": "- Laboratory", + "description": "- ()", + "enrollment_reqs": "", + "school": "Undergraduate Studies", + "credit_hours": "4", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "000255", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "69cda9299bb0b8cd4cf4c73f" + ], + "lecture_contact_hours": "", + "laboratory_contact_hours": "", + "offering_frequency": "", + "catalog_year": "24", + "attributes": null + }, + { + "_id": "69cda9299bb0b8cd4cf4c740", + "subject_prefix": "THEA", + "course_number": "1310", + "title": "Understanding Theatre", + "description": "THEA 1310 - Understanding Theatre (3 semester credit hours) Lectures, discussions, and performances designed to explore artistic, philosophical, social, historical, and psychological dimensions of the theatrical experience. Topics may include analysis of scripts, the nature of the theater compared to the other performing arts, and the nature of popular entertainments. (3-0) S", + "enrollment_reqs": "", + "school": "School of Arts, Humanities, and Technology", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "003909", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "69cda9299bb0b8cd4cf4c741" + ], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null + }, + { + "_id": "69cda9299bb0b8cd4cf4c731", + "subject_prefix": "ACCT", + "course_number": "2301", + "title": "Introductory Financial Accounting", + "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", + "enrollment_reqs": "ACCT 2301 Repeat Restriction", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "000061", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": [ + "69cda9299bb0b8cd4cf4c732", + "69cda9299bb0b8cd4cf4c735" + ], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null + }, + { + "_id": "69cda9299bb0b8cd4cf4c736", "subject_prefix": "BA", "course_number": "1320", "title": "Business in a Global World", - "description": "BA 1320 - Business in a Global World (3 semester credit hours) This course provides a primer on the impacts of globalization on business. We equip students with the basic facts of globalization and examine the business underpinnings and the institutions that shape globalization. We discuss major trends and the future of international management. The aim is an ability to think strategically and critically about global business issues. (3-0) S", + "description": "BA 1320 - Business in a Global World (3 semester credit hours) This course provides a primer on the impacts of globalization on business. We equip students with the basic facts of globalization and examine the business underpinnings and the institutions that shape globalization. We discuss major trends and the future of international management. The aim is an ability to think strategically and critically about global business issues. (3-0) S", "enrollment_reqs": "BA 1320 Repeat Restriction", "school": "Naveen Jindal School of Management", "credit_hours": "3", @@ -16,7 +92,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bef" + "69cda9299bb0b8cd4cf4c737" ], "lecture_contact_hours": "3", "laboratory_contact_hours": "0", @@ -25,11 +101,11 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf1", + "_id": "69cda9299bb0b8cd4cf4c739", "subject_prefix": "BIOL", "course_number": "6111", "title": "Graduate Research Presentation", - "description": "BIOL 6111 - Graduate Research Presentation (1 semester credit hour) This course will train graduate students (MS and PhD) in hypothesis building and testing, designing, and conducting experiments, and presenting scientific findings in an efficient and clear manner. During the class, graduate students will discuss and present their graduate research work-in-progress. Significant time outside of class will also be required to analyze data, assemble, and practice presentations. May be repeated for credit as topics vary (2 semester credit hours maximum). Department consent required. (1-0) S", + "description": "BIOL 6111 - Graduate Research Presentation (1 semester credit hour) This course will train graduate students (MS and PhD) in hypothesis building and testing, designing, and conducting experiments, and presenting scientific findings in an efficient and clear manner. During the class, graduate students will discuss and present their graduate research work-in-progress. Significant time outside of class will also be required to analyze data, assemble, and practice presentations. May be repeated for credit as topics vary (2 semester credit hours maximum). Department consent required. (1-0) S", "enrollment_reqs": "", "school": "School of Natural Sciences and Mathematics", "credit_hours": "1", @@ -41,7 +117,7 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bf2" + "69cda9299bb0b8cd4cf4c73a" ], "lecture_contact_hours": "1", "laboratory_contact_hours": "0", @@ -50,11 +126,11 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf4", + "_id": "69cda9299bb0b8cd4cf4c73c", "subject_prefix": "AERO", "course_number": "3320", "title": "- Recitation", - "description": "- ()", + "description": "- ()", "enrollment_reqs": "", "school": "Undergraduate Studies", "credit_hours": "Non-Enroll", @@ -66,63 +142,12 @@ "corequisites": null, "co_or_pre_requisites": null, "sections": [ - "67d07ee0c972c18731e23bf5" - ], - "lecture_contact_hours": "", - "laboratory_contact_hours": "", - "offering_frequency": "", - "catalog_year": "24", - "attributes": null - }, - { - "_id": "67d07ee0c972c18731e23bf6", - "subject_prefix": "AERO", - "course_number": "4320", - "title": "- Laboratory", - "description": "- ()", - "enrollment_reqs": "", - "school": "Undergraduate Studies", - "credit_hours": "4", - "class_level": "Undergraduate", - "activity_type": "Lecture", - "grading": "Graded - Undergraduate", - "internal_course_number": "000255", - "prerequisites": null, - "corequisites": null, - "co_or_pre_requisites": null, - "sections": [ - "67d07ee0c972c18731e23bf7" + "69cda9299bb0b8cd4cf4c73d" ], "lecture_contact_hours": "", "laboratory_contact_hours": "", "offering_frequency": "", "catalog_year": "24", "attributes": null - }, - { - "_id": "67d07ee0c972c18731e23be9", - "subject_prefix": "ACCT", - "course_number": "2301", - "title": "Introductory Financial Accounting", - "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting designed to create an awareness of the accounting concepts and principles for preparing the three basic financial statements: the income statement, balance sheet, and statement of cash flows. A minimum grade of C is required to take upper-division ACCT courses. (3-0) S", - "enrollment_reqs": "ACCT 2301 Repeat Restriction", - "school": "Naveen Jindal School of Management", - "credit_hours": "3", - "class_level": "Undergraduate", - "activity_type": "Lecture", - "grading": "Graded - Undergraduate", - "internal_course_number": "000061", - "prerequisites": null, - "corequisites": null, - "co_or_pre_requisites": null, - "sections": [ - "67d07ee0c972c18731e23bea", - "67d07ee0c972c18731e23bed" - ], - "lecture_contact_hours": "3", - "laboratory_contact_hours": "0", - "offering_frequency": "S", - "catalog_year": "24", - "attributes": null } ] diff --git a/parser/testdata/professors.json b/parser/testdata/professors.json index 2a931c4..7950af2 100644 --- a/parser/testdata/professors.json +++ b/parser/testdata/professors.json @@ -1,12 +1,12 @@ [ { - "_id": "67d07ee0c972c18731e23beb", - "first_name": "Naim Bugra", - "last_name": "Ozel", + "_id": "69cda9299bb0b8cd4cf4c734", + "first_name": "Jieying", + "last_name": "Zhang", "titles": [ "Primary Instructor (50%)" ], - "email": "nbo150030@utdallas.edu", + "email": "jxz146230@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -17,18 +17,18 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bea", - "67d07ee0c972c18731e23bed" + "69cda9299bb0b8cd4cf4c732", + "69cda9299bb0b8cd4cf4c735" ] }, { - "_id": "67d07ee0c972c18731e23bec", - "first_name": "Jieying", - "last_name": "Zhang", + "_id": "69cda9299bb0b8cd4cf4c738", + "first_name": "Peter", + "last_name": "Lewin", "titles": [ - "Primary Instructor (50%)" + "Primary Instructor" ], - "email": "jxz146230@utdallas.edu", + "email": "plewin@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -39,18 +39,17 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bea", - "67d07ee0c972c18731e23bed" + "69cda9299bb0b8cd4cf4c737" ] }, { - "_id": "67d07ee0c972c18731e23bf0", - "first_name": "Peter", - "last_name": "Lewin", + "_id": "69cda9299bb0b8cd4cf4c73b", + "first_name": "Tian", + "last_name": "Hong", "titles": [ "Primary Instructor" ], - "email": "plewin@utdallas.edu", + "email": "txh240018@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -61,17 +60,38 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bef" + "69cda9299bb0b8cd4cf4c73a" ] }, { - "_id": "67d07ee0c972c18731e23bf3", - "first_name": "Tian", - "last_name": "Hong", + "_id": "69cda9299bb0b8cd4cf4c742", + "first_name": "Damian", + "last_name": "Enyaosah", "titles": [ "Primary Instructor" ], - "email": "txh240018@utdallas.edu", + "email": "dpe190000@utdallas.edu", + "phone_number": "", + "office": { + "building": "", + "room": "", + "map_uri": "" + }, + "profile_uri": "", + "image_uri": "", + "office_hours": null, + "sections": [ + "69cda9299bb0b8cd4cf4c741" + ] + }, + { + "_id": "69cda9299bb0b8cd4cf4c733", + "first_name": "Naim Bugra", + "last_name": "Ozel", + "titles": [ + "Primary Instructor (50%)" + ], + "email": "nbo150030@utdallas.edu", "phone_number": "", "office": { "building": "", @@ -82,7 +102,8 @@ "image_uri": "", "office_hours": null, "sections": [ - "67d07ee0c972c18731e23bf2" + "69cda9299bb0b8cd4cf4c732", + "69cda9299bb0b8cd4cf4c735" ] } ] diff --git a/parser/testdata/sections.json b/parser/testdata/sections.json index 92319de..43a472c 100644 --- a/parser/testdata/sections.json +++ b/parser/testdata/sections.json @@ -1,8 +1,152 @@ [ { - "_id": "67d07ee0c972c18731e23bef", + "_id": "69cda9299bb0b8cd4cf4c732", + "section_number": "003", + "course_reference": "69cda9299bb0b8cd4cf4c731", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": [ + "69cda9299bb0b8cd4cf4c733", + "69cda9299bb0b8cd4cf4c734" + ], + "teaching_assistants": [ + { + "first_name": "Galymzhan", + "last_name": "Tazhibayev", + "role": "Teaching Assistant", + "email": "gxt230023@utdallas.edu" + }, + { + "first_name": "Dipta", + "last_name": "Banik", + "role": "Teaching Assistant", + "email": "dxb220047@utdallas.edu" + } + ], + "internal_class_number": "27706", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": [ + "Tuesday", + "Thursday" + ], + "start_time": "10:00am", + "end_time": "11:15am", + "modality": "", + "location": { + "building": "JSOM", + "room": "2.717", + "map_uri": "https://locator.utdallas.edu/SOM_2.717" + } + } + ], + "core_flags": [], + "syllabus_uri": "https://dox.utdallas.edu/syl152555", + "grade_distribution": [ + 9, + 9, + 4, + 6, + 4, + 5, + 12, + 3, + 1, + 3, + 1, + 0, + 4, + 3, + 0, + 0, + 0, + 0 + ], + "attributes": null + }, + { + "_id": "69cda9299bb0b8cd4cf4c735", + "section_number": "001", + "course_reference": "69cda9299bb0b8cd4cf4c731", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": [ + "69cda9299bb0b8cd4cf4c734", + "69cda9299bb0b8cd4cf4c733" + ], + "teaching_assistants": [ + { + "first_name": "Galymzhan", + "last_name": "Tazhibayev", + "role": "Teaching Assistant", + "email": "gxt230023@utdallas.edu" + }, + { + "first_name": "Dipta", + "last_name": "Banik", + "role": "Teaching Assistant", + "email": "dxb220047@utdallas.edu" + } + ], + "internal_class_number": "26595", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": [ + "Tuesday", + "Thursday" + ], + "start_time": "8:30am", + "end_time": "9:45am", + "modality": "", + "location": { + "building": "JSOM", + "room": "2.717", + "map_uri": "https://locator.utdallas.edu/SOM_2.717" + } + } + ], + "core_flags": [], + "syllabus_uri": "https://dox.utdallas.edu/syl152552", + "grade_distribution": [ + 2, + 7, + 5, + 8, + 8, + 7, + 3, + 6, + 1, + 3, + 3, + 0, + 9, + 1, + 0, + 0, + 0, + 0 + ], + "attributes": null + }, + { + "_id": "69cda9299bb0b8cd4cf4c737", "section_number": "501", - "course_reference": "67d07ee0c972c18731e23bee", + "course_reference": "69cda9299bb0b8cd4cf4c736", "section_corequisites": null, "academic_session": { "name": "25S", @@ -10,7 +154,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bf0" + "69cda9299bb0b8cd4cf4c738" ], "teaching_assistants": [ { @@ -45,13 +189,32 @@ "090" ], "syllabus_uri": "https://dox.utdallas.edu/syl153033", - "grade_distribution": [], + "grade_distribution": [ + 0, + 13, + 23, + 6, + 4, + 7, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0 + ], "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf2", + "_id": "69cda9299bb0b8cd4cf4c73a", "section_number": "016", - "course_reference": "67d07ee0c972c18731e23bf1", + "course_reference": "69cda9299bb0b8cd4cf4c739", "section_corequisites": null, "academic_session": { "name": "25S", @@ -59,7 +222,7 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23bf3" + "69cda9299bb0b8cd4cf4c73b" ], "teaching_assistants": [], "internal_class_number": "29611", @@ -87,9 +250,9 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf5", + "_id": "69cda9299bb0b8cd4cf4c73d", "section_number": "201", - "course_reference": "67d07ee0c972c18731e23bf4", + "course_reference": "69cda9299bb0b8cd4cf4c73c", "section_corequisites": null, "academic_session": { "name": "25S", @@ -123,9 +286,9 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bf7", + "_id": "69cda9299bb0b8cd4cf4c73f", "section_number": "002", - "course_reference": "67d07ee0c972c18731e23bf6", + "course_reference": "69cda9299bb0b8cd4cf4c73e", "section_corequisites": null, "academic_session": { "name": "25S", @@ -174,9 +337,9 @@ "attributes": null }, { - "_id": "67d07ee0c972c18731e23bea", - "section_number": "003", - "course_reference": "67d07ee0c972c18731e23be9", + "_id": "69cda9299bb0b8cd4cf4c741", + "section_number": "001", + "course_reference": "69cda9299bb0b8cd4cf4c740", "section_corequisites": null, "academic_session": { "name": "25S", @@ -184,99 +347,54 @@ "end_date": "2025-05-16T00:00:00-05:00" }, "professors": [ - "67d07ee0c972c18731e23beb", - "67d07ee0c972c18731e23bec" - ], - "teaching_assistants": [ - { - "first_name": "Galymzhan", - "last_name": "Tazhibayev", - "role": "Teaching Assistant", - "email": "gxt230023@utdallas.edu" - }, - { - "first_name": "Dipta", - "last_name": "Banik", - "role": "Teaching Assistant", - "email": "dxb220047@utdallas.edu" - } + "69cda9299bb0b8cd4cf4c742" ], - "internal_class_number": "27706", + "teaching_assistants": [], + "internal_class_number": "24043", "instruction_mode": "Face-to-Face", "meetings": [ { "start_date": "2025-01-21T00:00:00-06:00", "end_date": "2025-05-09T00:00:00-05:00", "meeting_days": [ - "Tuesday", - "Thursday" + "Monday", + "Wednesday" ], "start_time": "10:00am", "end_time": "11:15am", "modality": "", "location": { - "building": "JSOM", - "room": "2.717", - "map_uri": "https://locator.utdallas.edu/SOM_2.717" + "building": "FO", + "room": "1.202", + "map_uri": "https://locator.utdallas.edu/FO_1.202" } } ], - "core_flags": [], - "syllabus_uri": "https://dox.utdallas.edu/syl152555", - "grade_distribution": [], - "attributes": null - }, - { - "_id": "67d07ee0c972c18731e23bed", - "section_number": "001", - "course_reference": "67d07ee0c972c18731e23be9", - "section_corequisites": null, - "academic_session": { - "name": "25S", - "start_date": "2025-01-21T00:00:00-06:00", - "end_date": "2025-05-16T00:00:00-05:00" - }, - "professors": [ - "67d07ee0c972c18731e23bec", - "67d07ee0c972c18731e23beb" + "core_flags": [ + "050", + "090" ], - "teaching_assistants": [ - { - "first_name": "Galymzhan", - "last_name": "Tazhibayev", - "role": "Teaching Assistant", - "email": "gxt230023@utdallas.edu" - }, - { - "first_name": "Dipta", - "last_name": "Banik", - "role": "Teaching Assistant", - "email": "dxb220047@utdallas.edu" - } + "syllabus_uri": "https://dox.utdallas.edu/syl154931", + "grade_distribution": [ + 4, + 7, + 9, + 3, + 1, + 1, + 0, + 1, + 2, + 0, + 0, + 0, + 5, + 0, + 0, + 0, + 0, + 0 ], - "internal_class_number": "26595", - "instruction_mode": "Face-to-Face", - "meetings": [ - { - "start_date": "2025-01-21T00:00:00-06:00", - "end_date": "2025-05-09T00:00:00-05:00", - "meeting_days": [ - "Tuesday", - "Thursday" - ], - "start_time": "8:30am", - "end_time": "9:45am", - "modality": "", - "location": { - "building": "JSOM", - "room": "2.717", - "map_uri": "https://locator.utdallas.edu/SOM_2.717" - } - } - ], - "core_flags": [], - "syllabus_uri": "https://dox.utdallas.edu/syl152552", - "grade_distribution": [], "attributes": null } ] diff --git a/parser/validator_test.go b/parser/validator_test.go index 3e40c2d..e6a6013 100644 --- a/parser/validator_test.go +++ b/parser/validator_test.go @@ -52,8 +52,15 @@ func init() { panic(err) } - // The correct mapping - indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4} + courseIndex := make(map[primitive.ObjectID]int) + for i, course := range testCourses { + courseIndex[course.Id] = i + } + + indexMap = make(map[int]int, len(testSections)) + for i, section := range testSections { + indexMap[i] = courseIndex[section.Course_reference] + } } // TestDuplicateCoursesFail expects duplicates to trigger validation panic. @@ -257,11 +264,11 @@ func TestSectionReferenceProfFail(t *testing.T) { logOutput := logBuffer.String() for _, msg := range []string{ - "Nonexistent professor reference found for section ID ObjectID(\"67d07ee0c972c18731e23bea\")!", - "Referenced professor ID: ObjectID(\"67d07ee0c972c18731e23beb\")", + "Nonexistent professor reference found for section ID ObjectID(\"69cda9299bb0b8cd4cf4c732\")!", + "Referenced professor ID: ObjectID(\"69cda9299bb0b8cd4cf4c734\")", } { if !strings.Contains(logOutput, msg) { - t.Errorf("The function didn't log correct message. Expected \"%v\"", msg) + t.Errorf("The function didn't log correct message. \nExpected \"%v\"\nGot \"%v\"", msg, logOutput) } } diff --git a/scrapers/coursebook.go b/scrapers/coursebook.go index 4f6119c..539b6a8 100644 --- a/scrapers/coursebook.go +++ b/scrapers/coursebook.go @@ -25,7 +25,7 @@ import ( ) var ( - prefixRegex = regexp.MustCompile("cp_[a-z]{0,5}") + prefixRegex = regexp.MustCompile("^cp_[a-z]{1,5}$") termRegex = regexp.MustCompile("[0-9]{1,2}[sfu]") ) diff --git a/utils/methods.go b/utils/methods.go index 90712e2..70b8bfe 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -224,8 +224,8 @@ func WriteJSON(filepath string, data interface{}) error { defer fptr.Close() encoder := json.NewEncoder(fptr) encoder.SetIndent("", "\t") - encoder.Encode(data) - return nil + encoder.SetEscapeHTML(false) + return encoder.Encode(data) } // GetAllFilesWithExtension recursively gathers file paths within inDir that match extension.