From e994aa08caa6b7227c2dbe24e851610e2d3af4fd Mon Sep 17 00:00:00 2001 From: caadiq Date: Fri, 9 Jan 2026 22:00:14 +0900 Subject: [PATCH] =?UTF-8?q?refactor:=20API=20=EB=B0=8F=20=ED=8E=98?= =?UTF-8?q?=EC=9D=B4=EC=A7=80=20=ED=8F=B4=EB=8D=94=20=EA=B5=AC=EC=A1=B0=20?= =?UTF-8?q?=EC=A0=95=EB=A6=AC=20(2/3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - api/schedules, albums, members → api/public/로 이동 - pages/pc/*.jsx → pages/pc/public/로 이동 - pages/mobile/*.jsx → pages/mobile/public/로 이동 - App.jsx 라우터 경로 수정 - 모든 public 페이지의 import 경로 수정 --- backend/scrape_all.cjs | 241 ++++++++++++++++++ backend/scrape_all.js | 239 +++++++++++++++++ backend/scrape_log.txt | 111 ++++++++ backend/scrape_search.cjs | 229 +++++++++++++++++ backend/scrape_search_log.txt | 34 +++ frontend/src/App.jsx | 24 +- frontend/src/api/{ => public}/albums.js | 0 frontend/src/api/{ => public}/members.js | 0 frontend/src/api/{ => public}/schedules.js | 0 .../src/pages/mobile/{ => public}/Album.jsx | 0 .../pages/mobile/{ => public}/AlbumDetail.jsx | 0 .../mobile/{ => public}/AlbumGallery.jsx | 0 .../src/pages/mobile/{ => public}/Home.jsx | 2 +- .../src/pages/mobile/{ => public}/Members.jsx | 0 .../pages/mobile/{ => public}/Schedule.jsx | 0 frontend/src/pages/pc/{ => public}/Album.jsx | 0 .../src/pages/pc/{ => public}/AlbumDetail.jsx | 0 .../pages/pc/{ => public}/AlbumGallery.jsx | 0 frontend/src/pages/pc/{ => public}/Home.jsx | 2 +- .../src/pages/pc/{ => public}/Members.jsx | 0 .../src/pages/pc/{ => public}/Schedule.jsx | 4 +- 21 files changed, 870 insertions(+), 16 deletions(-) create mode 100644 backend/scrape_all.cjs create mode 100644 backend/scrape_all.js create mode 100644 backend/scrape_log.txt create mode 100644 backend/scrape_search.cjs create mode 100644 backend/scrape_search_log.txt rename frontend/src/api/{ => public}/albums.js (100%) rename frontend/src/api/{ => public}/members.js (100%) rename frontend/src/api/{ => public}/schedules.js (100%) rename frontend/src/pages/mobile/{ => public}/Album.jsx (100%) rename frontend/src/pages/mobile/{ => public}/AlbumDetail.jsx (100%) rename frontend/src/pages/mobile/{ => public}/AlbumGallery.jsx (100%) rename frontend/src/pages/mobile/{ => public}/Home.jsx (99%) rename frontend/src/pages/mobile/{ => public}/Members.jsx (100%) rename frontend/src/pages/mobile/{ => public}/Schedule.jsx (100%) rename frontend/src/pages/pc/{ => public}/Album.jsx (100%) rename frontend/src/pages/pc/{ => public}/AlbumDetail.jsx (100%) rename frontend/src/pages/pc/{ => public}/AlbumGallery.jsx (100%) rename frontend/src/pages/pc/{ => public}/Home.jsx (99%) rename frontend/src/pages/pc/{ => public}/Members.jsx (100%) rename frontend/src/pages/pc/{ => public}/Schedule.jsx (99%) diff --git a/backend/scrape_all.cjs b/backend/scrape_all.cjs new file mode 100644 index 0000000..22e4f92 --- /dev/null +++ b/backend/scrape_all.cjs @@ -0,0 +1,241 @@ +const https = require("https"); +const http = require("http"); +const mysql = require("mysql2/promise"); + +// 설정 +const NITTER_URL = "http://nitter:8080"; +const USERNAME = "realfromis_9"; +const DELAY_MS = 1000; // 페이지 간 딜레이 + +// DB 연결 +const dbConfig = { + host: process.env.DB_HOST || "mariadb", + user: process.env.DB_USER || "fromis9_user", + password: process.env.DB_PASSWORD || "fromis9_password", + database: process.env.DB_NAME || "fromis9", +}; + +async function fetchPage(url) { + return new Promise((resolve, reject) => { + const client = url.startsWith("https") ? https : http; + client + .get(url, (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => resolve(data)); + }) + .on("error", reject); + }); +} + +function parseDateTime(timeStr) { + // "Jan 7, 2026 · 12:00 PM UTC" -> MySQL DATETIME + if (!timeStr) return null; + try { + const cleaned = timeStr.replace(" · ", " ").replace(" UTC", ""); + const date = new Date(cleaned + " UTC"); + if (isNaN(date.getTime())) return null; + return date.toISOString().slice(0, 19).replace("T", " "); + } catch (e) { + return null; + } +} + +function extractTweets(html) { + const tweets = []; + const tweetContainers = html.split('class="timeline-item '); + + for (let i = 1; i < tweetContainers.length; i++) { + const container = tweetContainers[i]; + const tweet = {}; + + // 고정 트윗 체크 + tweet.isPinned = + tweetContainers[i - 1].includes("pinned") || container.includes("Pinned"); + + // 리트윗 체크 + tweet.isRetweet = container.includes('class="retweet-header"'); + + // 트윗 ID 추출 + const linkMatch = container.match(/href="\/[^\/]+\/status\/(\d+)/); + tweet.id = linkMatch ? linkMatch[1] : null; + + // 시간 추출 + const timeMatch = container.match( + /]*>]*title="([^"]+)"/ + ); + tweet.time = timeMatch ? parseDateTime(timeMatch[1]) : null; + + // 텍스트 내용 추출 + const contentMatch = container.match( + /
]*>([\s\S]*?)<\/div>/ + ); + if (contentMatch) { + tweet.text = contentMatch[1] + .replace(//g, "\n") + .replace(/]*>([^<]*)<\/a>/g, "$1") + .replace(/<[^>]+>/g, "") + .trim(); + } + + // 이미지 URL 추출 + const imageMatches = container.match(/href="\/pic\/([^"]+)"/g); + tweet.images = []; + if (imageMatches) { + imageMatches.forEach((match) => { + const urlMatch = match.match(/href="\/pic\/([^"]+)"/); + if (urlMatch) { + const decoded = decodeURIComponent(urlMatch[1]); + // 전체 URL로 변환 + tweet.images.push("https://pbs.twimg.com/" + decoded); + } + }); + } + + // 비디오 체크 + tweet.hasVideo = + container.includes("gallery-video") || + container.includes("video-container"); + + // URL 생성 + tweet.url = tweet.id + ? `https://x.com/${USERNAME}/status/${tweet.id}` + : null; + + if (tweet.id) { + tweets.push(tweet); + } + } + + return tweets; +} + +function extractNextCursor(html) { + // show-more 링크에서 cursor 추출 + const cursorMatch = html.match( + /class="show-more"[^>]*>\s* 트윗 없음 (연속 ${consecutiveEmpty}회)`); + if (consecutiveEmpty >= 3) { + console.log("\n연속 3페이지 트윗 없음. 스크래핑 완료."); + break; + } + } else { + consecutiveEmpty = 0; + const saved = await saveTweets(pool, tweets); + totalSaved += saved; + console.log( + ` -> ${tweets.length}개 추출, ${saved}개 저장 (누적: ${totalSaved})` + ); + } + + // 다음 페이지 cursor 추출 + const nextCursor = extractNextCursor(html); + if (!nextCursor) { + console.log("\n다음 페이지 없음. 스크래핑 완료."); + break; + } + + cursor = nextCursor; + pageNum++; + + // 딜레이 + await new Promise((r) => setTimeout(r, DELAY_MS)); + } catch (error) { + console.error(` -> 오류: ${error.message}`); + consecutiveEmpty++; + if (consecutiveEmpty >= 5) { + console.log("\n연속 오류. 스크래핑 중단."); + break; + } + await new Promise((r) => setTimeout(r, DELAY_MS * 3)); + } + } + + console.log("\n" + "=".repeat(60)); + console.log("스크래핑 완료"); + console.log(`총 저장: ${totalSaved}개`); + console.log("=".repeat(60)); + + // 통계 출력 + const [stats] = await pool.query(` + SELECT + COUNT(*) as total, + SUM(is_retweet) as retweets, + SUM(NOT is_retweet) as original, + SUM(has_video) as with_video, + MIN(created_at) as oldest, + MAX(created_at) as newest + FROM x_tweets + `); + console.log("\n[통계]"); + console.log(stats[0]); + + await pool.end(); + process.exit(0); +} + +main().catch((err) => { + console.error("치명적 오류:", err); + process.exit(1); +}); diff --git a/backend/scrape_all.js b/backend/scrape_all.js new file mode 100644 index 0000000..83fd09f --- /dev/null +++ b/backend/scrape_all.js @@ -0,0 +1,239 @@ +const https = require("https"); +const http = require("http"); +const mysql = require("mysql2/promise"); + +// 설정 +const NITTER_URL = "http://nitter:8080"; +const USERNAME = "realfromis_9"; +const DELAY_MS = 1000; // 페이지 간 딜레이 + +// DB 연결 +const dbConfig = { + host: process.env.DB_HOST || "mariadb", + user: process.env.DB_USER || "fromis9_user", + password: process.env.DB_PASSWORD || "fromis9_password", + database: process.env.DB_NAME || "fromis9", +}; + +async function fetchPage(url) { + return new Promise((resolve, reject) => { + const client = url.startsWith("https") ? https : http; + client + .get(url, (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => resolve(data)); + }) + .on("error", reject); + }); +} + +function parseDateTime(timeStr) { + // "Jan 7, 2026 · 12:00 PM UTC" -> MySQL DATETIME + if (!timeStr) return null; + try { + const cleaned = timeStr.replace(" · ", " ").replace(" UTC", ""); + const date = new Date(cleaned + " UTC"); + if (isNaN(date.getTime())) return null; + return date.toISOString().slice(0, 19).replace("T", " "); + } catch (e) { + return null; + } +} + +function extractTweets(html) { + const tweets = []; + const tweetContainers = html.split('class="timeline-item '); + + for (let i = 1; i < tweetContainers.length; i++) { + const container = tweetContainers[i]; + const tweet = {}; + + // 고정 트윗 체크 + tweet.isPinned = + tweetContainers[i - 1].includes("pinned") || container.includes("Pinned"); + + // 리트윗 체크 + tweet.isRetweet = container.includes('class="retweet-header"'); + + // 트윗 ID 추출 + const linkMatch = container.match(/href="\/[^\/]+\/status\/(\d+)/); + tweet.id = linkMatch ? linkMatch[1] : null; + + // 시간 추출 + const timeMatch = container.match( + /]*>]*title="([^"]+)"/ + ); + tweet.time = timeMatch ? parseDateTime(timeMatch[1]) : null; + + // 텍스트 내용 추출 + const contentMatch = container.match( + /
]*>([\s\S]*?)<\/div>/ + ); + if (contentMatch) { + tweet.text = contentMatch[1] + .replace(//g, "\n") + .replace(/]*>([^<]*)<\/a>/g, "$1") + .replace(/<[^>]+>/g, "") + .trim(); + } + + // 이미지 URL 추출 + const imageMatches = container.match(/href="\/pic\/([^"]+)"/g); + tweet.images = []; + if (imageMatches) { + imageMatches.forEach((match) => { + const urlMatch = match.match(/href="\/pic\/([^"]+)"/); + if (urlMatch) { + const decoded = decodeURIComponent(urlMatch[1]); + // 전체 URL로 변환 + tweet.images.push("https://pbs.twimg.com/" + decoded); + } + }); + } + + // 비디오 체크 + tweet.hasVideo = + container.includes("gallery-video") || + container.includes("video-container"); + + // URL 생성 + tweet.url = tweet.id + ? `https://x.com/${USERNAME}/status/${tweet.id}` + : null; + + if (tweet.id) { + tweets.push(tweet); + } + } + + return tweets; +} + +function extractNextCursor(html) { + // Load more 링크에서 cursor 추출 + const cursorMatch = html.match(/href="\/[^?]+\?cursor=([^"]+)"/); + return cursorMatch ? cursorMatch[1] : null; +} + +async function saveTweets(pool, tweets) { + let saved = 0; + for (const tweet of tweets) { + try { + await pool.query( + ` + INSERT IGNORE INTO x_tweets (id, username, text, created_at, is_retweet, is_pinned, images, has_video, url) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + `, + [ + tweet.id, + USERNAME, + tweet.text, + tweet.time, + tweet.isRetweet, + tweet.isPinned, + JSON.stringify(tweet.images), + tweet.hasVideo, + tweet.url, + ] + ); + saved++; + } catch (e) { + console.error(`저장 오류 (ID: ${tweet.id}):`, e.message); + } + } + return saved; +} + +async function main() { + console.log("=".repeat(60)); + console.log("X 트윗 전체 스크래핑 시작"); + console.log("=".repeat(60)); + console.log(`대상: @${USERNAME}`); + console.log(`Nitter: ${NITTER_URL}`); + console.log(""); + + const pool = await mysql.createPool(dbConfig); + + let cursor = null; + let pageNum = 1; + let totalSaved = 0; + let consecutiveEmpty = 0; + + while (true) { + const url = cursor + ? `${NITTER_URL}/${USERNAME}?cursor=${cursor}` + : `${NITTER_URL}/${USERNAME}`; + + console.log(`[페이지 ${pageNum}] 스크래핑 중...`); + + try { + const html = await fetchPage(url); + const tweets = extractTweets(html); + + if (tweets.length === 0) { + consecutiveEmpty++; + console.log(` -> 트윗 없음 (연속 ${consecutiveEmpty}회)`); + if (consecutiveEmpty >= 3) { + console.log("\n연속 3페이지 트윗 없음. 스크래핑 완료."); + break; + } + } else { + consecutiveEmpty = 0; + const saved = await saveTweets(pool, tweets); + totalSaved += saved; + console.log( + ` -> ${tweets.length}개 추출, ${saved}개 저장 (누적: ${totalSaved})` + ); + } + + // 다음 페이지 cursor 추출 + const nextCursor = extractNextCursor(html); + if (!nextCursor) { + console.log("\n다음 페이지 없음. 스크래핑 완료."); + break; + } + + cursor = nextCursor; + pageNum++; + + // 딜레이 + await new Promise((r) => setTimeout(r, DELAY_MS)); + } catch (error) { + console.error(` -> 오류: ${error.message}`); + consecutiveEmpty++; + if (consecutiveEmpty >= 5) { + console.log("\n연속 오류. 스크래핑 중단."); + break; + } + await new Promise((r) => setTimeout(r, DELAY_MS * 3)); + } + } + + console.log("\n" + "=".repeat(60)); + console.log("스크래핑 완료"); + console.log(`총 저장: ${totalSaved}개`); + console.log("=".repeat(60)); + + // 통계 출력 + const [stats] = await pool.query(` + SELECT + COUNT(*) as total, + SUM(is_retweet) as retweets, + SUM(NOT is_retweet) as original, + SUM(has_video) as with_video, + MIN(created_at) as oldest, + MAX(created_at) as newest + FROM x_tweets + `); + console.log("\n[통계]"); + console.log(stats[0]); + + await pool.end(); + process.exit(0); +} + +main().catch((err) => { + console.error("치명적 오류:", err); + process.exit(1); +}); diff --git a/backend/scrape_log.txt b/backend/scrape_log.txt new file mode 100644 index 0000000..f6c495f --- /dev/null +++ b/backend/scrape_log.txt @@ -0,0 +1,111 @@ +============================================================ +X 트윗 전체 스크래핑 시작 +============================================================ +대상: @realfromis_9 +Nitter: http://nitter:8080 + +[페이지 1] 스크래핑 중... + -> 21개 추출, 21개 저장 (누적: 21) +[페이지 2] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 40) +[페이지 3] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 60) +[페이지 4] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 79) +[페이지 5] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 99) +[페이지 6] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 119) +[페이지 7] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 139) +[페이지 8] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 159) +[페이지 9] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 179) +[페이지 10] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 199) +[페이지 11] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 219) +[페이지 12] 스크래핑 중... + -> 18개 추출, 18개 저장 (누적: 237) +[페이지 13] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 257) +[페이지 14] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 276) +[페이지 15] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 296) +[페이지 16] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 316) +[페이지 17] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 336) +[페이지 18] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 355) +[페이지 19] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 375) +[페이지 20] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 395) +[페이지 21] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 415) +[페이지 22] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 435) +[페이지 23] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 455) +[페이지 24] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 475) +[페이지 25] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 495) +[페이지 26] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 515) +[페이지 27] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 535) +[페이지 28] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 555) +[페이지 29] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 575) +[페이지 30] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 595) +[페이지 31] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 615) +[페이지 32] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 634) +[페이지 33] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 654) +[페이지 34] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 674) +[페이지 35] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 694) +[페이지 36] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 714) +[페이지 37] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 733) +[페이지 38] 스크래핑 중... + -> 18개 추출, 18개 저장 (누적: 751) +[페이지 39] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 771) +[페이지 40] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 791) +[페이지 41] 스크래핑 중... + -> 20개 추출, 20개 저장 (누적: 811) +[페이지 42] 스크래핑 중... + -> 19개 추출, 19개 저장 (누적: 830) +[페이지 43] 스크래핑 중... + -> 10개 추출, 10개 저장 (누적: 840) +[페이지 44] 스크래핑 중... + -> 트윗 없음 (연속 1회) + +다음 페이지 없음. 스크래핑 완료. + +============================================================ +스크래핑 완료 +총 저장: 840개 +============================================================ + +[통계] +{ + total: 840, + retweets: '244', + original: '596', + with_video: '58', + oldest: 2025-06-16T12:01:00.000Z, + newest: 2026-01-07T12:00:00.000Z +} diff --git a/backend/scrape_search.cjs b/backend/scrape_search.cjs new file mode 100644 index 0000000..8db3e53 --- /dev/null +++ b/backend/scrape_search.cjs @@ -0,0 +1,229 @@ +const https = require("https"); +const http = require("http"); +const mysql = require("mysql2/promise"); + +// 설정 +const NITTER_URL = "http://nitter:8080"; +const USERNAME = "realfromis_9"; +const DELAY_MS = 1500; + +// 검색 기간 (X 계정 이관일 ~ 기존 스크래핑 시작점) +const SEARCH_SINCE = "2025-04-24"; +const SEARCH_UNTIL = "2025-06-16"; + +// DB 연결 +const dbConfig = { + host: process.env.DB_HOST || "mariadb", + user: process.env.DB_USER || "fromis9_user", + password: process.env.DB_PASSWORD || "fromis9_password", + database: process.env.DB_NAME || "fromis9", +}; + +async function fetchPage(url) { + return new Promise((resolve, reject) => { + const client = url.startsWith("https") ? https : http; + client + .get(url, (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => resolve(data)); + }) + .on("error", reject); + }); +} + +function parseDateTime(timeStr) { + if (!timeStr) return null; + try { + const cleaned = timeStr.replace(" · ", " ").replace(" UTC", ""); + const date = new Date(cleaned + " UTC"); + if (isNaN(date.getTime())) return null; + return date.toISOString().slice(0, 19).replace("T", " "); + } catch (e) { + return null; + } +} + +function extractSearchTweets(html) { + const tweets = []; + const tweetContainers = html.split('class="timeline-item '); + + for (let i = 1; i < tweetContainers.length; i++) { + const container = tweetContainers[i]; + const tweet = {}; + + tweet.isPinned = false; + tweet.isRetweet = container.includes('class="retweet-header"'); + + const linkMatch = container.match(/href="\/[^\/]+\/status\/(\d+)/); + tweet.id = linkMatch ? linkMatch[1] : null; + + const timeMatch = container.match( + /]*>]*title="([^"]+)"/ + ); + tweet.time = timeMatch ? parseDateTime(timeMatch[1]) : null; + + const contentMatch = container.match( + /
]*>([\s\S]*?)<\/div>/ + ); + if (contentMatch) { + tweet.text = contentMatch[1] + .replace(//g, "\n") + .replace(/]*>([^<]*)<\/a>/g, "$1") + .replace(/<[^>]+>/g, "") + .trim(); + } + + const imageMatches = container.match(/href="\/pic\/([^"]+)"/g); + tweet.images = []; + if (imageMatches) { + imageMatches.forEach((match) => { + const urlMatch = match.match(/href="\/pic\/([^"]+)"/); + if (urlMatch) { + const decoded = decodeURIComponent(urlMatch[1]); + tweet.images.push("https://pbs.twimg.com/" + decoded); + } + }); + } + + tweet.hasVideo = + container.includes("gallery-video") || + container.includes("video-container"); + + tweet.url = tweet.id + ? `https://x.com/${USERNAME}/status/${tweet.id}` + : null; + + if (tweet.id) { + tweets.push(tweet); + } + } + + return tweets; +} + +function extractNextCursor(html) { + const cursorMatch = html.match( + /class="show-more"[^>]*>\s* 0) saved++; + } catch (e) { + console.error(`저장 오류 (ID: ${tweet.id}):`, e.message); + } + } + return saved; +} + +async function main() { + console.log("=".repeat(60)); + console.log("X 트윗 검색 스크래핑 (누락 기간)"); + console.log("=".repeat(60)); + console.log(`대상: @${USERNAME}`); + console.log(`기간: ${SEARCH_SINCE} ~ ${SEARCH_UNTIL}`); + console.log(""); + + const pool = await mysql.createPool(dbConfig); + + const searchQuery = encodeURIComponent( + `from:${USERNAME} since:${SEARCH_SINCE} until:${SEARCH_UNTIL}` + ); + let cursor = null; + let pageNum = 1; + let totalSaved = 0; + let consecutiveEmpty = 0; + + while (true) { + const url = cursor + ? `${NITTER_URL}/search?f=tweets&q=${searchQuery}&cursor=${cursor}` + : `${NITTER_URL}/search?f=tweets&q=${searchQuery}`; + + console.log(`[페이지 ${pageNum}] 검색 중...`); + + try { + const html = await fetchPage(url); + const tweets = extractSearchTweets(html); + + if (tweets.length === 0) { + consecutiveEmpty++; + console.log(` -> 트윗 없음 (연속 ${consecutiveEmpty}회)`); + if (consecutiveEmpty >= 3) { + console.log("\n연속 3페이지 트윗 없음. 스크래핑 완료."); + break; + } + } else { + consecutiveEmpty = 0; + const saved = await saveTweets(pool, tweets); + totalSaved += saved; + console.log( + ` -> ${tweets.length}개 추출, ${saved}개 저장 (누적: ${totalSaved})` + ); + } + + const nextCursor = extractNextCursor(html); + if (!nextCursor) { + console.log("\n다음 페이지 없음. 스크래핑 완료."); + break; + } + + cursor = nextCursor; + pageNum++; + + await new Promise((r) => setTimeout(r, DELAY_MS)); + } catch (error) { + console.error(` -> 오류: ${error.message}`); + consecutiveEmpty++; + if (consecutiveEmpty >= 5) { + console.log("\n연속 오류. 스크래핑 중단."); + break; + } + await new Promise((r) => setTimeout(r, DELAY_MS * 3)); + } + } + + console.log("\n" + "=".repeat(60)); + console.log("검색 스크래핑 완료"); + console.log(`추가 저장: ${totalSaved}개`); + console.log("=".repeat(60)); + + const [stats] = await pool.query(` + SELECT + COUNT(*) as total, + SUM(is_retweet) as retweets, + SUM(NOT is_retweet) as original, + MIN(created_at) as oldest, + MAX(created_at) as newest + FROM x_tweets + `); + console.log("\n[전체 통계]"); + console.log(stats[0]); + + await pool.end(); + process.exit(0); +} + +main().catch((err) => { + console.error("치명적 오류:", err); + process.exit(1); +}); diff --git a/backend/scrape_search_log.txt b/backend/scrape_search_log.txt new file mode 100644 index 0000000..c49dfd4 --- /dev/null +++ b/backend/scrape_search_log.txt @@ -0,0 +1,34 @@ +============================================================ +X 트윗 검색 스크래핑 (누락 기간) +============================================================ +대상: @realfromis_9 +기간: 2025-04-24 ~ 2025-06-16 + +[페이지 1] 검색 중... + -> 20개 추출, 20개 저장 (누적: 20) +[페이지 2] 검색 중... + -> 20개 추출, 20개 저장 (누적: 40) +[페이지 3] 검색 중... + -> 20개 추출, 20개 저장 (누적: 60) +[페이지 4] 검색 중... + -> 20개 추출, 20개 저장 (누적: 80) +[페이지 5] 검색 중... + -> 14개 추출, 14개 저장 (누적: 94) +[페이지 6] 검색 중... + -> 트윗 없음 (연속 1회) + +다음 페이지 없음. 스크래핑 완료. + +============================================================ +검색 스크래핑 완료 +추가 저장: 94개 +============================================================ + +[전체 통계] +{ + total: 934, + retweets: '244', + original: '690', + oldest: 2025-04-24T12:00:00.000Z, + newest: 2026-01-07T12:00:00.000Z +} diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 910bcdc..673fad6 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -6,20 +6,20 @@ import { BrowserView, MobileView } from 'react-device-detect'; import ScrollToTop from './components/ScrollToTop'; // PC 페이지 -import PCHome from './pages/pc/Home'; -import PCMembers from './pages/pc/Members'; -import PCAlbum from './pages/pc/Album'; -import PCAlbumDetail from './pages/pc/AlbumDetail'; -import PCAlbumGallery from './pages/pc/AlbumGallery'; -import PCSchedule from './pages/pc/Schedule'; +import PCHome from './pages/pc/public/Home'; +import PCMembers from './pages/pc/public/Members'; +import PCAlbum from './pages/pc/public/Album'; +import PCAlbumDetail from './pages/pc/public/AlbumDetail'; +import PCAlbumGallery from './pages/pc/public/AlbumGallery'; +import PCSchedule from './pages/pc/public/Schedule'; // 모바일 페이지 -import MobileHome from './pages/mobile/Home'; -import MobileMembers from './pages/mobile/Members'; -import MobileAlbum from './pages/mobile/Album'; -import MobileAlbumDetail from './pages/mobile/AlbumDetail'; -import MobileAlbumGallery from './pages/mobile/AlbumGallery'; -import MobileSchedule from './pages/mobile/Schedule'; +import MobileHome from './pages/mobile/public/Home'; +import MobileMembers from './pages/mobile/public/Members'; +import MobileAlbum from './pages/mobile/public/Album'; +import MobileAlbumDetail from './pages/mobile/public/AlbumDetail'; +import MobileAlbumGallery from './pages/mobile/public/AlbumGallery'; +import MobileSchedule from './pages/mobile/public/Schedule'; // 관리자 페이지 import AdminLogin from './pages/pc/admin/AdminLogin'; diff --git a/frontend/src/api/albums.js b/frontend/src/api/public/albums.js similarity index 100% rename from frontend/src/api/albums.js rename to frontend/src/api/public/albums.js diff --git a/frontend/src/api/members.js b/frontend/src/api/public/members.js similarity index 100% rename from frontend/src/api/members.js rename to frontend/src/api/public/members.js diff --git a/frontend/src/api/schedules.js b/frontend/src/api/public/schedules.js similarity index 100% rename from frontend/src/api/schedules.js rename to frontend/src/api/public/schedules.js diff --git a/frontend/src/pages/mobile/Album.jsx b/frontend/src/pages/mobile/public/Album.jsx similarity index 100% rename from frontend/src/pages/mobile/Album.jsx rename to frontend/src/pages/mobile/public/Album.jsx diff --git a/frontend/src/pages/mobile/AlbumDetail.jsx b/frontend/src/pages/mobile/public/AlbumDetail.jsx similarity index 100% rename from frontend/src/pages/mobile/AlbumDetail.jsx rename to frontend/src/pages/mobile/public/AlbumDetail.jsx diff --git a/frontend/src/pages/mobile/AlbumGallery.jsx b/frontend/src/pages/mobile/public/AlbumGallery.jsx similarity index 100% rename from frontend/src/pages/mobile/AlbumGallery.jsx rename to frontend/src/pages/mobile/public/AlbumGallery.jsx diff --git a/frontend/src/pages/mobile/Home.jsx b/frontend/src/pages/mobile/public/Home.jsx similarity index 99% rename from frontend/src/pages/mobile/Home.jsx rename to frontend/src/pages/mobile/public/Home.jsx index ad38875..9dfc241 100644 --- a/frontend/src/pages/mobile/Home.jsx +++ b/frontend/src/pages/mobile/public/Home.jsx @@ -2,7 +2,7 @@ import { motion } from 'framer-motion'; import { ChevronRight, Clock, Tag } from 'lucide-react'; import { useState, useEffect } from 'react'; import { useNavigate } from 'react-router-dom'; -import { getTodayKST } from '../../utils/date'; +import { getTodayKST } from '../../../utils/date'; // 모바일 홈 페이지 function MobileHome() { diff --git a/frontend/src/pages/mobile/Members.jsx b/frontend/src/pages/mobile/public/Members.jsx similarity index 100% rename from frontend/src/pages/mobile/Members.jsx rename to frontend/src/pages/mobile/public/Members.jsx diff --git a/frontend/src/pages/mobile/Schedule.jsx b/frontend/src/pages/mobile/public/Schedule.jsx similarity index 100% rename from frontend/src/pages/mobile/Schedule.jsx rename to frontend/src/pages/mobile/public/Schedule.jsx diff --git a/frontend/src/pages/pc/Album.jsx b/frontend/src/pages/pc/public/Album.jsx similarity index 100% rename from frontend/src/pages/pc/Album.jsx rename to frontend/src/pages/pc/public/Album.jsx diff --git a/frontend/src/pages/pc/AlbumDetail.jsx b/frontend/src/pages/pc/public/AlbumDetail.jsx similarity index 100% rename from frontend/src/pages/pc/AlbumDetail.jsx rename to frontend/src/pages/pc/public/AlbumDetail.jsx diff --git a/frontend/src/pages/pc/AlbumGallery.jsx b/frontend/src/pages/pc/public/AlbumGallery.jsx similarity index 100% rename from frontend/src/pages/pc/AlbumGallery.jsx rename to frontend/src/pages/pc/public/AlbumGallery.jsx diff --git a/frontend/src/pages/pc/Home.jsx b/frontend/src/pages/pc/public/Home.jsx similarity index 99% rename from frontend/src/pages/pc/Home.jsx rename to frontend/src/pages/pc/public/Home.jsx index c9cc756..715488c 100644 --- a/frontend/src/pages/pc/Home.jsx +++ b/frontend/src/pages/pc/public/Home.jsx @@ -2,7 +2,7 @@ import { useState, useEffect } from 'react'; import { motion } from 'framer-motion'; import { Link } from 'react-router-dom'; import { Calendar, ArrowRight, Clock, Link2, Tag } from 'lucide-react'; -import { getTodayKST } from '../../utils/date'; +import { getTodayKST } from '../../../utils/date'; function Home() { const [members, setMembers] = useState([]); diff --git a/frontend/src/pages/pc/Members.jsx b/frontend/src/pages/pc/public/Members.jsx similarity index 100% rename from frontend/src/pages/pc/Members.jsx rename to frontend/src/pages/pc/public/Members.jsx diff --git a/frontend/src/pages/pc/Schedule.jsx b/frontend/src/pages/pc/public/Schedule.jsx similarity index 99% rename from frontend/src/pages/pc/Schedule.jsx rename to frontend/src/pages/pc/public/Schedule.jsx index 3930ccd..892d5e0 100644 --- a/frontend/src/pages/pc/Schedule.jsx +++ b/frontend/src/pages/pc/public/Schedule.jsx @@ -4,8 +4,8 @@ import { motion, AnimatePresence } from 'framer-motion'; import { Clock, ChevronLeft, ChevronRight, ChevronDown, Tag, Search, ArrowLeft, Link2 } from 'lucide-react'; import { useInfiniteQuery } from '@tanstack/react-query'; import { useInView } from 'react-intersection-observer'; -import { getTodayKST } from '../../utils/date'; -import { getSchedules, getCategories, searchSchedules as searchSchedulesApi } from '../../api/schedules'; +import { getTodayKST } from '../../../../utils/date'; +import { getSchedules, getCategories, searchSchedules as searchSchedulesApi } from '../../../../api/public/schedules'; function Schedule() { const navigate = useNavigate();