diff --git a/.github/workflows/crawl_recent_tj.yml b/.github/workflows/crawl_recent_tj.yml
index 8e61263..94ce3c0 100644
--- a/.github/workflows/crawl_recent_tj.yml
+++ b/.github/workflows/crawl_recent_tj.yml
@@ -33,6 +33,6 @@ jobs:
echo "SUPABASE_URL=${{ secrets.SUPABASE_URL }}" >> .env
echo "SUPABASE_KEY=${{ secrets.SUPABASE_KEY }}" >> .env
- - name: run crawl script
+ - name: run crawl script - crawlRecentTJ.ts
working-directory: packages/crawling
run: pnpm run recent-tj
diff --git a/.github/workflows/tagging_song.yml b/.github/workflows/tagging_song.yml
new file mode 100644
index 0000000..10b5111
--- /dev/null
+++ b/.github/workflows/tagging_song.yml
@@ -0,0 +1,43 @@
+name: Tagging Songs
+
+on:
+ schedule:
+ - cron: "0 14 * * *" # 한국 시간 23:00 실행 (UTC+9 → UTC 14:00)
+ workflow_dispatch:
+
+permissions:
+ contents: write # push 권한을 위해 필요
+
+jobs:
+ run-npm-task:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout branch
+ uses: actions/checkout@v4
+
+ - name: Use Node.js 20
+ uses: actions/setup-node@v4
+ with:
+ node-version: "20"
+
+ - name: Install pnpm
+ uses: pnpm/action-setup@v2
+ with:
+ version: 9
+ run_install: false
+
+ - name: Install dependencies
+ working-directory: packages/crawling
+ run: pnpm install
+
+ - name: Create .env file
+ working-directory: packages/crawling
+ run: |
+ echo "SUPABASE_URL=${{ secrets.SUPABASE_URL }}" >> .env
+ echo "SUPABASE_KEY=${{ secrets.SUPABASE_KEY }}" >> .env
+ echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> .env
+
+ - name: run tagging script - taggingSongs.ts
+ working-directory: packages/crawling
+ run: pnpm run tag-songs
diff --git a/.github/workflows/update_ky_youtube.yml b/.github/workflows/update_ky_youtube.yml
index 83ddbe7..4354cfb 100644
--- a/.github/workflows/update_ky_youtube.yml
+++ b/.github/workflows/update_ky_youtube.yml
@@ -38,6 +38,6 @@ jobs:
echo "SUPABASE_KEY=${{ secrets.SUPABASE_KEY }}" >> .env
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> .env
- - name: run update script - packages/crawling/crawlYoutube.ts
+ - name: run update script - crawlYoutube.ts
working-directory: packages/crawling
run: pnpm run ky-youtube
diff --git a/.github/workflows/verify_ky_youtube.yml b/.github/workflows/verify_ky_youtube.yml
index 2d9f4fe..e65f7e8 100644
--- a/.github/workflows/verify_ky_youtube.yml
+++ b/.github/workflows/verify_ky_youtube.yml
@@ -38,6 +38,6 @@ jobs:
echo "SUPABASE_KEY=${{ secrets.SUPABASE_KEY }}" >> .env
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> .env
- - name: run verify script - packages/crawling
+ - name: run verify script - crawlYoutubeVerify.ts
working-directory: packages/crawling
run: pnpm run ky-verify
diff --git a/CLAUDE.md b/CLAUDE.md
index ad72a5a..c5e5abb 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -44,7 +44,7 @@ packages/
eslint-config/ — Shared ESLint config (@repo/eslint-config)
format-config/ — Shared Prettier config (@repo/format-config)
typescript-config/ — Shared tsconfig bases
- crawling/ — One-off data crawling scripts (not a published package)
+ crawling/ — Data crawling & tagging scripts (see packages/crawling/CLAUDE.md)
```
## Web App Architecture
diff --git a/apps/web/public/sitemap-0.xml b/apps/web/public/sitemap-0.xml
index 73fd5f7..c905ea8 100644
--- a/apps/web/public/sitemap-0.xml
+++ b/apps/web/public/sitemap-0.xml
@@ -1,4 +1,4 @@
-https://www.singcode.kr2026-03-25T14:32:28.966Zweekly0.7
+https://www.singcode.kr2026-03-27T14:29:45.638Zweekly0.7
\ No newline at end of file
diff --git a/apps/web/src/app/api/search/route.ts b/apps/web/src/app/api/search/route.ts
index 7b86bd3..1cb66f5 100644
--- a/apps/web/src/app/api/search/route.ts
+++ b/apps/web/src/app/api/search/route.ts
@@ -6,9 +6,11 @@ import { SearchSong, Song } from '@/types/song';
import { getAuthenticatedUser } from '@/utils/getAuthenticatedUser';
interface DBSong extends Song {
- thumb_logs: {
- thumb_count: number;
- }[] | null;
+ thumb_logs:
+ | {
+ thumb_count: number;
+ }[]
+ | null;
tosings: {
user_id: string;
}[];
diff --git a/apps/web/src/app/api/songs/thumb-up/route.ts b/apps/web/src/app/api/songs/thumb-up/route.ts
index d8a2061..db5d6a6 100644
--- a/apps/web/src/app/api/songs/thumb-up/route.ts
+++ b/apps/web/src/app/api/songs/thumb-up/route.ts
@@ -30,9 +30,7 @@ export async function GET(): Promise>> {
}
// 3) 상위 50개 song_id 추출
- const sorted = [...thumbMap.entries()]
- .sort((a, b) => b[1] - a[1])
- .slice(0, 50);
+ const sorted = [...thumbMap.entries()].sort((a, b) => b[1] - a[1]).slice(0, 50);
const songIds = sorted.map(([songId]) => songId);
diff --git a/packages/crawling/CLAUDE.md b/packages/crawling/CLAUDE.md
index 7136ffc..3bdf7d0 100644
--- a/packages/crawling/CLAUDE.md
+++ b/packages/crawling/CLAUDE.md
@@ -13,6 +13,8 @@ pnpm ky-open # Open API(금영)로 KY 번호 수집
pnpm ky-youtube # YouTube 크롤링으로 KY 번호 수집 + AI 검증
pnpm ky-verify # 기존 KY 번호의 실제 존재 여부 재검증 (체크포인트 지원)
pnpm ky-update # ky-youtube + ky-verify 병렬 실행
+pnpm recent-tj # TJ 최신곡 크롤링
+pnpm tag-songs # AI 기반 곡 자동 태깅
pnpm test # vitest 실행
pnpm lint # ESLint
```
@@ -94,8 +96,33 @@ findKYByOpen.ts
| ------------------ | -------------------------------- |
| `songs` | 메인 곡 데이터 (TJ/KY 번호 포함) |
| `invalid_ky_songs` | KY 번호 수집 실패 목록 |
+| `tags` | 태그 마스터 (id, name, category) |
+| `song_tags` | 곡-태그 매핑 (song_id, tag_id) |
+| `verify_ky_songs` | KY 번호 검증 완료 목록 |
### AI 유틸
- `utils/validateSongMatch.ts` — `gpt-4o-mini`로 두 (제목, 아티스트) 쌍이 같은 곡인지 판단. `temperature: 0`, `max_tokens: 20`, 완전 일치 시 API 호출 생략.
- `utils/transChatGPT.ts` — `gpt-4-turbo`로 일본어 → 한국어 번역.
+- `utils/getSongTag.ts` — `gpt-4o-mini`로 곡에 적절한 태그 ID 자동 할당. DB의 `tags` 테이블에서 태그 목록을 캐싱하여 프롬프트에 포함.
+
+### 곡 태깅 파이프라인
+
+```
+taggingSongs.ts
+ └─ getSongsAllDB() # 전체 곡 조회
+ └─ getSongTagSongIdsDB() # 이미 태그된 곡 ID Set 로드 (스킵 처리)
+ └─ autoTagSong(title, artist) # AI로 태그 ID 추출 (1~4개)
+ └─ postSongTagsDB(songId, tagIds) # song_tags 테이블에 insert
+```
+
+### GitHub Actions 워크플로우
+
+| 워크플로우 파일 | 스케줄 (UTC) | 실행 스크립트 |
+| ----------------------- | ------------ | -------------------- |
+| `crawl_recent_tj.yml` | 매일 14:00 | `pnpm recent-tj` |
+| `tagging_song.yml` | 매일 14:00 | `pnpm tag-songs` |
+| `update_ky_youtube.yml` | 수동 | `pnpm ky-youtube` |
+| `verify_ky_youtube.yml` | 수동 | `pnpm ky-verify` |
+
+모든 워크플로우는 `workflow_dispatch`로 수동 실행도 가능하다.
diff --git a/packages/crawling/package.json b/packages/crawling/package.json
index e81f2ff..48a1907 100644
--- a/packages/crawling/package.json
+++ b/packages/crawling/package.json
@@ -8,10 +8,11 @@
},
"scripts": {
"ky-open": "tsx src/findKYByOpen.ts",
- "ky-youtube": "tsx src/crawling/crawlYoutube.ts",
- "ky-verify": "tsx src/crawling/crawlYoutubeVerify.ts",
+ "ky-youtube": "tsx src/cron/crawlYoutube.ts",
+ "ky-verify": "tsx src/cron/crawlYoutubeVerify.ts",
"ky-update": "pnpm run ky-youtube & pnpm run ky-verify",
- "recent-tj": "tsx src/crawling/crawlRecentTJ.ts",
+ "recent-tj": "tsx src/cron/crawlRecentTJ.ts",
+ "tag-songs": "tsx src/cron/taggingSongs.ts",
"lint": "eslint .",
"test": "vitest run",
"format": "prettier --write \"**/*.{ts,tsx,md}\""
diff --git a/packages/crawling/src/crawling/crawlRecentTJ.ts b/packages/crawling/src/cron/crawlRecentTJ.ts
similarity index 100%
rename from packages/crawling/src/crawling/crawlRecentTJ.ts
rename to packages/crawling/src/cron/crawlRecentTJ.ts
diff --git a/packages/crawling/src/crawling/crawlYoutube.ts b/packages/crawling/src/cron/crawlYoutube.ts
similarity index 98%
rename from packages/crawling/src/crawling/crawlYoutube.ts
rename to packages/crawling/src/cron/crawlYoutube.ts
index 9ce215e..39d8d97 100644
--- a/packages/crawling/src/crawling/crawlYoutube.ts
+++ b/packages/crawling/src/cron/crawlYoutube.ts
@@ -6,7 +6,7 @@ import { postInvalidKYSongsDB } from '@/supabase/postDB';
import { updateSongsKyDB } from '@/supabase/updateDB';
import { Song } from '@/types';
-import { isValidKYExistNumber } from './isValidKYExistNumber';
+import { isValidKYExistNumber } from '../crawling/isValidKYExistNumber';
// --- Constants ---
const BASE_YOUTUBE_SEARCH_URL = 'https://www.youtube.com/@KARAOKEKY/search';
diff --git a/packages/crawling/src/crawling/crawlYoutubeVerify.ts b/packages/crawling/src/cron/crawlYoutubeVerify.ts
similarity index 90%
rename from packages/crawling/src/crawling/crawlYoutubeVerify.ts
rename to packages/crawling/src/cron/crawlYoutubeVerify.ts
index d4c6ea9..6dc0ee6 100644
--- a/packages/crawling/src/crawling/crawlYoutubeVerify.ts
+++ b/packages/crawling/src/cron/crawlYoutubeVerify.ts
@@ -4,7 +4,7 @@ import { getSongsKyNotNullDB, getVerifyKySongsDB } from '@/supabase/getDB';
import { postVerifyKySongsDB } from '@/supabase/postDB';
import { updateSongsKyDB } from '@/supabase/updateDB';
-import { isValidKYExistNumber } from './isValidKYExistNumber';
+import { isValidKYExistNumber } from '../crawling/isValidKYExistNumber';
// 기존에 등록된 KY 노래방 번호가 실제로 KY 노래방과 일치하는지 검증
// 유효한 곡은 verify_ky_songs 테이블에 insert
@@ -44,9 +44,8 @@ for (const song of data) {
}
index++;
- console.log('crawlYoutubeVerify : ', index);
- if (index >= 2000) break;
+ if (index >= 5000) break;
}
browser.close();
diff --git a/packages/crawling/src/cron/taggingSongs.ts b/packages/crawling/src/cron/taggingSongs.ts
new file mode 100644
index 0000000..2eeb7f5
--- /dev/null
+++ b/packages/crawling/src/cron/taggingSongs.ts
@@ -0,0 +1,59 @@
+import { getSongTagSongIdsDB, getSongsAllDB } from '@/supabase/getDB';
+import { postSongTagsDB } from '@/supabase/postDB';
+import { autoTagSong } from '@/utils/getSongTag';
+
+const resultsLog = {
+ success: 0,
+ failed: 0,
+ skipped: 0,
+};
+
+// 1. 전체 곡 조회 + 이미 태그된 곡 ID 로드
+const [allSongs, taggedSongIds] = await Promise.all([getSongsAllDB(), getSongTagSongIdsDB()]);
+
+console.log('전체 곡 수:', allSongs.length);
+console.log('이미 태그된 곡 수:', taggedSongIds.size);
+
+// 2. 순차 순회 (테스트: 5회만 실행)
+let processedCount = 0;
+for (const song of allSongs) {
+ if (processedCount >= 5000) break;
+ if (taggedSongIds.has(song.id)) {
+ resultsLog.skipped++;
+ continue;
+ }
+
+ try {
+ const tagIds = await autoTagSong(song.title, song.artist);
+
+ if (tagIds.length === 0) {
+ resultsLog.failed++;
+ console.log(`[FAIL] ${song.title} - ${song.artist}: 태그 없음`);
+ continue;
+ }
+
+ const success = await postSongTagsDB(song.id, tagIds);
+ if (success) {
+ resultsLog.success++;
+ console.log(`[OK] ${song.title} - ${song.artist}: [${tagIds.join(', ')}]`);
+ } else {
+ resultsLog.failed++;
+ }
+ } catch (error) {
+ resultsLog.failed++;
+ console.error(`[ERROR] ${song.title} - ${song.artist}:`, error);
+ }
+
+ processedCount++;
+
+ // OpenAI rate limit 대비 딜레이
+ await new Promise(resolve => setTimeout(resolve, 200));
+}
+
+// 3. 결과 출력
+console.log(`
+ 총 ${allSongs.length}곡 중:
+ - 스킵 (이미 태그됨): ${resultsLog.skipped}곡
+ - 성공: ${resultsLog.success}곡
+ - 실패: ${resultsLog.failed}곡
+`);
diff --git a/packages/crawling/src/supabase/getDB.ts b/packages/crawling/src/supabase/getDB.ts
index 81c7a3c..2cb5655 100644
--- a/packages/crawling/src/supabase/getDB.ts
+++ b/packages/crawling/src/supabase/getDB.ts
@@ -84,3 +84,27 @@ export async function getVerifyKySongsDB(): Promise> {
return new Set(data.map(row => row.id));
}
+
+export async function getSongsAllDB(max: number = 50000) {
+ const supabase = getClient();
+
+ const { data, error } = await supabase
+ .from('songs')
+ .select('id, title, artist')
+ .order('created_at', { ascending: false })
+ .limit(max);
+
+ if (error) throw error;
+
+ return data;
+}
+
+export async function getSongTagSongIdsDB(): Promise> {
+ const supabase = getClient();
+
+ const { data, error } = await supabase.from('song_tags').select('song_id').limit(50000);
+
+ if (error) throw error;
+
+ return new Set(data.map(row => row.song_id));
+}
diff --git a/packages/crawling/src/supabase/postDB.ts b/packages/crawling/src/supabase/postDB.ts
index d53f82d..23417ae 100644
--- a/packages/crawling/src/supabase/postDB.ts
+++ b/packages/crawling/src/supabase/postDB.ts
@@ -52,6 +52,18 @@ export async function postVerifyKySongsDB(song: Song) {
}
}
+export async function postSongTagsDB(songId: string, tagIds: number[]) {
+ const supabase = getClient();
+ const rows = tagIds.map(tagId => ({ song_id: songId, tag_id: tagId }));
+
+ const { error } = await supabase.from('song_tags').insert(rows);
+ if (error) {
+ console.error('postSongTagsDB error:', error);
+ return false;
+ }
+ return true;
+}
+
export async function postInvalidKYSongsDB(song: Song) {
const supabase = getClient();
diff --git a/packages/crawling/src/utils/getSongTag.ts b/packages/crawling/src/utils/getSongTag.ts
new file mode 100644
index 0000000..4999846
--- /dev/null
+++ b/packages/crawling/src/utils/getSongTag.ts
@@ -0,0 +1,92 @@
+import OpenAI from 'openai';
+import dotenv from 'dotenv';
+
+import { getClient } from '@/supabase/getClient';
+
+dotenv.config();
+
+const client = new OpenAI({
+ apiKey: process.env.OPENAI_API_KEY,
+});
+
+// 태그 정보를 담을 타입 정의
+interface Tag {
+ id: number;
+ name: string;
+ category: string;
+}
+
+let cachedTagsPrompt: string | null = null;
+
+/**
+ * DB에서 전체 태그 목록을 읽어와 AI 프롬프트용 텍스트로 변환한다.
+ */
+const getTagsForPrompt = async (): Promise => {
+ if (cachedTagsPrompt) return cachedTagsPrompt;
+
+ const supabase = getClient();
+ const { data: tags, error } = await supabase
+ .from('tags')
+ .select('id, name, category')
+ .order('id');
+
+ if (error) {
+ console.error('Error fetching tags:', error);
+ return '';
+ }
+
+ // AI가 읽기 편하게 "ID: 이름 (카테고리)" 형식으로 변환
+ cachedTagsPrompt = tags.map((tag: Tag) => `${tag.id}: ${tag.name} (${tag.category})`).join('\n');
+ return cachedTagsPrompt;
+};
+
+/**
+ * AI를 활용해 노래에 적절한 태그 ID들을 추출한다.
+ */
+export const autoTagSong = async (title: string, artist: string): Promise => {
+ try {
+ // 1단계: 프롬프트용 태그 리스트 준비
+ const tagsPrompt = await getTagsForPrompt();
+ if (!tagsPrompt) return [];
+
+ // 2단계: OpenAI API 호출
+ const response = await client.chat.completions.create({
+ model: 'gpt-4o-mini', // 가성비가 좋은 모델 사용
+ messages: [
+ {
+ role: 'system',
+ content: `
+ You are a music database expert. Based on the song title and artist, categorize the song by selecting appropriate tag IDs from the provided list.
+
+ Guidelines:
+ 1. Select at least one tag, but no more than 4.
+ 2. Prioritize Language (100s), then Genre (200s), then Origin (300s).
+ 3. If it's Japanese music, ALWAYS include 101 (J-POP).
+ 4. Be precise. If it's from an Anime, use 302 (애니메이션).
+ 5. Return only JSON: {"tag_ids": [number, number, ...]}
+
+ Allowed Tags List:
+ ${tagsPrompt}
+ `,
+ },
+ {
+ role: 'user',
+ content: `Title: "${title}", Artist: "${artist}"`,
+ },
+ ],
+ response_format: { type: 'json_object' },
+ temperature: 0, // 결과의 일관성을 위해 0으로 설정
+ max_tokens: 50, // 결과가 짧으므로 토큰 제한
+ });
+
+ const content = response.choices[0].message.content;
+ if (!content) return [];
+
+ // 3단계: 결과 파싱 및 반환
+ const result: { tag_ids: number[] } = JSON.parse(content);
+ return result.tag_ids;
+ } catch (error) {
+ console.error('Error auto-tagging song:', error);
+ return [];
+ }
+};