- api/schedules, albums, members → api/public/로 이동 - pages/pc/*.jsx → pages/pc/public/로 이동 - pages/mobile/*.jsx → pages/mobile/public/로 이동 - App.jsx 라우터 경로 수정 - 모든 public 페이지의 import 경로 수정
229 lines
6.2 KiB
JavaScript
229 lines
6.2 KiB
JavaScript
const https = require("https");
|
|
const http = require("http");
|
|
const mysql = require("mysql2/promise");
|
|
|
|
// 설정
|
|
const NITTER_URL = "http://nitter:8080";
|
|
const USERNAME = "realfromis_9";
|
|
const DELAY_MS = 1500;
|
|
|
|
// 검색 기간 (X 계정 이관일 ~ 기존 스크래핑 시작점)
|
|
const SEARCH_SINCE = "2025-04-24";
|
|
const SEARCH_UNTIL = "2025-06-16";
|
|
|
|
// DB 연결
|
|
const dbConfig = {
|
|
host: process.env.DB_HOST || "mariadb",
|
|
user: process.env.DB_USER || "fromis9_user",
|
|
password: process.env.DB_PASSWORD || "fromis9_password",
|
|
database: process.env.DB_NAME || "fromis9",
|
|
};
|
|
|
|
async function fetchPage(url) {
|
|
return new Promise((resolve, reject) => {
|
|
const client = url.startsWith("https") ? https : http;
|
|
client
|
|
.get(url, (res) => {
|
|
let data = "";
|
|
res.on("data", (chunk) => (data += chunk));
|
|
res.on("end", () => resolve(data));
|
|
})
|
|
.on("error", reject);
|
|
});
|
|
}
|
|
|
|
function parseDateTime(timeStr) {
|
|
if (!timeStr) return null;
|
|
try {
|
|
const cleaned = timeStr.replace(" · ", " ").replace(" UTC", "");
|
|
const date = new Date(cleaned + " UTC");
|
|
if (isNaN(date.getTime())) return null;
|
|
return date.toISOString().slice(0, 19).replace("T", " ");
|
|
} catch (e) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function extractSearchTweets(html) {
|
|
const tweets = [];
|
|
const tweetContainers = html.split('class="timeline-item ');
|
|
|
|
for (let i = 1; i < tweetContainers.length; i++) {
|
|
const container = tweetContainers[i];
|
|
const tweet = {};
|
|
|
|
tweet.isPinned = false;
|
|
tweet.isRetweet = container.includes('class="retweet-header"');
|
|
|
|
const linkMatch = container.match(/href="\/[^\/]+\/status\/(\d+)/);
|
|
tweet.id = linkMatch ? linkMatch[1] : null;
|
|
|
|
const timeMatch = container.match(
|
|
/<span class="tweet-date"[^>]*><a[^>]*title="([^"]+)"/
|
|
);
|
|
tweet.time = timeMatch ? parseDateTime(timeMatch[1]) : null;
|
|
|
|
const contentMatch = container.match(
|
|
/<div class="tweet-content[^"]*"[^>]*>([\s\S]*?)<\/div>/
|
|
);
|
|
if (contentMatch) {
|
|
tweet.text = contentMatch[1]
|
|
.replace(/<br\s*\/?>/g, "\n")
|
|
.replace(/<a[^>]*>([^<]*)<\/a>/g, "$1")
|
|
.replace(/<[^>]+>/g, "")
|
|
.trim();
|
|
}
|
|
|
|
const imageMatches = container.match(/href="\/pic\/([^"]+)"/g);
|
|
tweet.images = [];
|
|
if (imageMatches) {
|
|
imageMatches.forEach((match) => {
|
|
const urlMatch = match.match(/href="\/pic\/([^"]+)"/);
|
|
if (urlMatch) {
|
|
const decoded = decodeURIComponent(urlMatch[1]);
|
|
tweet.images.push("https://pbs.twimg.com/" + decoded);
|
|
}
|
|
});
|
|
}
|
|
|
|
tweet.hasVideo =
|
|
container.includes("gallery-video") ||
|
|
container.includes("video-container");
|
|
|
|
tweet.url = tweet.id
|
|
? `https://x.com/${USERNAME}/status/${tweet.id}`
|
|
: null;
|
|
|
|
if (tweet.id) {
|
|
tweets.push(tweet);
|
|
}
|
|
}
|
|
|
|
return tweets;
|
|
}
|
|
|
|
function extractNextCursor(html) {
|
|
const cursorMatch = html.match(
|
|
/class="show-more"[^>]*>\s*<a href="[^"]*cursor=([^"&]+)/
|
|
);
|
|
return cursorMatch ? cursorMatch[1] : null;
|
|
}
|
|
|
|
async function saveTweets(pool, tweets) {
|
|
let saved = 0;
|
|
for (const tweet of tweets) {
|
|
try {
|
|
const [result] = await pool.query(
|
|
`INSERT IGNORE INTO x_tweets (id, username, text, created_at, is_retweet, is_pinned, images, has_video, url)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
[
|
|
tweet.id,
|
|
USERNAME,
|
|
tweet.text,
|
|
tweet.time,
|
|
tweet.isRetweet,
|
|
tweet.isPinned,
|
|
JSON.stringify(tweet.images),
|
|
tweet.hasVideo,
|
|
tweet.url,
|
|
]
|
|
);
|
|
if (result.affectedRows > 0) saved++;
|
|
} catch (e) {
|
|
console.error(`저장 오류 (ID: ${tweet.id}):`, e.message);
|
|
}
|
|
}
|
|
return saved;
|
|
}
|
|
|
|
async function main() {
|
|
console.log("=".repeat(60));
|
|
console.log("X 트윗 검색 스크래핑 (누락 기간)");
|
|
console.log("=".repeat(60));
|
|
console.log(`대상: @${USERNAME}`);
|
|
console.log(`기간: ${SEARCH_SINCE} ~ ${SEARCH_UNTIL}`);
|
|
console.log("");
|
|
|
|
const pool = await mysql.createPool(dbConfig);
|
|
|
|
const searchQuery = encodeURIComponent(
|
|
`from:${USERNAME} since:${SEARCH_SINCE} until:${SEARCH_UNTIL}`
|
|
);
|
|
let cursor = null;
|
|
let pageNum = 1;
|
|
let totalSaved = 0;
|
|
let consecutiveEmpty = 0;
|
|
|
|
while (true) {
|
|
const url = cursor
|
|
? `${NITTER_URL}/search?f=tweets&q=${searchQuery}&cursor=${cursor}`
|
|
: `${NITTER_URL}/search?f=tweets&q=${searchQuery}`;
|
|
|
|
console.log(`[페이지 ${pageNum}] 검색 중...`);
|
|
|
|
try {
|
|
const html = await fetchPage(url);
|
|
const tweets = extractSearchTweets(html);
|
|
|
|
if (tweets.length === 0) {
|
|
consecutiveEmpty++;
|
|
console.log(` -> 트윗 없음 (연속 ${consecutiveEmpty}회)`);
|
|
if (consecutiveEmpty >= 3) {
|
|
console.log("\n연속 3페이지 트윗 없음. 스크래핑 완료.");
|
|
break;
|
|
}
|
|
} else {
|
|
consecutiveEmpty = 0;
|
|
const saved = await saveTweets(pool, tweets);
|
|
totalSaved += saved;
|
|
console.log(
|
|
` -> ${tweets.length}개 추출, ${saved}개 저장 (누적: ${totalSaved})`
|
|
);
|
|
}
|
|
|
|
const nextCursor = extractNextCursor(html);
|
|
if (!nextCursor) {
|
|
console.log("\n다음 페이지 없음. 스크래핑 완료.");
|
|
break;
|
|
}
|
|
|
|
cursor = nextCursor;
|
|
pageNum++;
|
|
|
|
await new Promise((r) => setTimeout(r, DELAY_MS));
|
|
} catch (error) {
|
|
console.error(` -> 오류: ${error.message}`);
|
|
consecutiveEmpty++;
|
|
if (consecutiveEmpty >= 5) {
|
|
console.log("\n연속 오류. 스크래핑 중단.");
|
|
break;
|
|
}
|
|
await new Promise((r) => setTimeout(r, DELAY_MS * 3));
|
|
}
|
|
}
|
|
|
|
console.log("\n" + "=".repeat(60));
|
|
console.log("검색 스크래핑 완료");
|
|
console.log(`추가 저장: ${totalSaved}개`);
|
|
console.log("=".repeat(60));
|
|
|
|
const [stats] = await pool.query(`
|
|
SELECT
|
|
COUNT(*) as total,
|
|
SUM(is_retweet) as retweets,
|
|
SUM(NOT is_retweet) as original,
|
|
MIN(created_at) as oldest,
|
|
MAX(created_at) as newest
|
|
FROM x_tweets
|
|
`);
|
|
console.log("\n[전체 통계]");
|
|
console.log(stats[0]);
|
|
|
|
await pool.end();
|
|
process.exit(0);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("치명적 오류:", err);
|
|
process.exit(1);
|
|
});
|