diff --git a/backend/scripts/refetch-retweets.js b/backend/scripts/refetch-retweets.js
new file mode 100644
index 0000000..7ecc190
--- /dev/null
+++ b/backend/scripts/refetch-retweets.js
@@ -0,0 +1,95 @@
+/**
+ * 리트윗 데이터 재수집 스크립트
+ * 잘못 저장된 리트윗 일정을 Nitter에서 다시 가져와 수정합니다.
+ *
+ * 사용법: node scripts/refetch-retweets.js [scheduleId1,scheduleId2,...]
+ */
+import mysql from 'mysql2/promise';
+import { fetchSingleTweet, extractTitle } from '../src/services/x/scraper.js';
+
+const NITTER_URL = process.env.NITTER_URL || 'http://nitter:8080';
+
+const pool = mysql.createPool({
+ host: process.env.DB_HOST || 'mariadb',
+ port: parseInt(process.env.DB_PORT || '3306'),
+ user: process.env.DB_USER || 'fromis9',
+ password: process.env.DB_PASSWORD || 'fromis9',
+ database: process.env.DB_NAME || 'fromis9',
+});
+
+async function main() {
+ // CLI에서 특정 ID 지정 가능
+ const argIds = process.argv[2]?.split(',').map(Number).filter(Boolean);
+
+ let rows;
+ if (argIds && argIds.length > 0) {
+ [rows] = await pool.query(
+ `SELECT sx.schedule_id, sx.post_id, sx.username, sx.content
+ FROM schedule_x sx WHERE sx.schedule_id IN (?)`,
+ [argIds]
+ );
+ } else {
+ [rows] = await pool.query(
+ `SELECT sx.schedule_id, sx.post_id, sx.username, sx.content
+ FROM schedule_x sx
+ WHERE sx.content LIKE 'RT @%' OR sx.content LIKE '%nitter%t.co%'`
+ );
+ }
+
+ console.log(`대상: ${rows.length}건`);
+ if (rows.length === 0) {
+ await pool.end();
+ return;
+ }
+
+ let updated = 0;
+ let failed = 0;
+
+ for (const row of rows) {
+ try {
+ // RT @username: 에서 원본 작성자 추출
+ const rtMatch = row.content?.match(/^RT @(\w+):/);
+ const fetchUsername = rtMatch ? rtMatch[1] : (row.username || 'realfromis_9');
+
+ console.log(`[${row.schedule_id}] post_id=${row.post_id}, from=@${fetchUsername}`);
+
+ const tweet = await fetchSingleTweet(NITTER_URL, fetchUsername, row.post_id);
+
+ // RT @ 프리픽스 제거
+ let newContent = tweet.text;
+ const rtPrefixMatch = newContent.match(/^RT @\w+:\s*/);
+ if (rtPrefixMatch) {
+ newContent = newContent.slice(rtPrefixMatch[0].length);
+ }
+ // 끝의 … 제거
+ newContent = newContent.replace(/…$/, '').trim();
+
+ const newTitle = extractTitle(newContent);
+ const newImageUrls = tweet.imageUrls.length > 0 ? JSON.stringify(tweet.imageUrls) : null;
+
+ // DB 업데이트
+ await pool.query('UPDATE schedules SET title = ? WHERE id = ?', [newTitle, row.schedule_id]);
+ await pool.query(
+ 'UPDATE schedule_x SET username = ?, content = ?, image_urls = ? WHERE schedule_id = ?',
+ [fetchUsername, newContent, newImageUrls, row.schedule_id]
+ );
+
+ console.log(` -> title: ${newTitle.substring(0, 60)} | images: ${tweet.imageUrls.length}`);
+ updated++;
+
+ // Nitter 부하 방지
+ await new Promise(r => setTimeout(r, 500));
+ } catch (err) {
+ console.error(` -> 실패: ${err.message}`);
+ failed++;
+ }
+ }
+
+ console.log(`\n완료: ${updated}건 수정, ${failed}건 실패`);
+ await pool.end();
+}
+
+main().catch(err => {
+ console.error(err);
+ process.exit(1);
+});
diff --git a/backend/src/routes/admin/x.js b/backend/src/routes/admin/x.js
index 6ec622c..0a8de88 100644
--- a/backend/src/routes/admin/x.js
+++ b/backend/src/routes/admin/x.js
@@ -1,5 +1,5 @@
import { fetchSingleTweet, extractTitle } from '../../services/x/scraper.js';
-import { addOrUpdateSchedule } from '../../services/meilisearch/index.js';
+import { addOrUpdateSchedule, syncScheduleById } from '../../services/meilisearch/index.js';
import { formatDate, formatTime } from '../../utils/date.js';
import config, { CATEGORY_IDS } from '../../config/index.js';
import {
@@ -161,4 +161,122 @@ export default async function xRoutes(fastify) {
return serverError(reply, err.message);
}
});
+
+ /**
+ * POST /api/admin/x/refetch-retweets
+ * 리트윗 데이터 재수집 (잘못된 content/image_urls 수정)
+ */
+ fastify.post('/refetch-retweets', {
+ schema: {
+ tags: ['admin/x'],
+ summary: '리트윗 데이터 재수집',
+ description: '잘못 저장된 리트윗 일정을 Nitter에서 다시 가져와 수정합니다.',
+ security: [{ bearerAuth: [] }],
+ body: {
+ type: 'object',
+ properties: {
+ scheduleIds: {
+ type: 'array',
+ items: { type: 'integer' },
+ description: '재수집할 일정 ID 목록 (비어있으면 전체 리트윗 대상)',
+ },
+ },
+ },
+ },
+ preHandler: [fastify.authenticate],
+ }, async (request, reply) => {
+ try {
+ let rows;
+ const { scheduleIds } = request.body || {};
+
+ if (scheduleIds && scheduleIds.length > 0) {
+ // 특정 일정만
+ [rows] = await db.query(
+ `SELECT sx.schedule_id, sx.post_id, sx.username, sx.content
+ FROM schedule_x sx
+ WHERE sx.schedule_id IN (?)`,
+ [scheduleIds]
+ );
+ } else {
+ // content가 "RT @"로 시작하거나, image_urls가 NULL이면서 nitter 링크가 있는 일정
+ [rows] = await db.query(
+ `SELECT sx.schedule_id, sx.post_id, sx.username, sx.content
+ FROM schedule_x sx
+ WHERE sx.content LIKE 'RT @%'
+ OR (sx.content LIKE '%nitter%t.co%')
+ OR (sx.image_urls IS NULL AND sx.content LIKE 'RT @%')`
+ );
+ }
+
+ if (rows.length === 0) {
+ return { success: true, message: '재수집 대상이 없습니다.', updated: 0 };
+ }
+
+ let updated = 0;
+ const errors = [];
+
+ for (const row of rows) {
+ try {
+ // content에서 원본 작성자 추출 (RT @username: 형식)
+ let fetchUsername = row.username || DEFAULT_USERNAME;
+ const rtMatch = row.content?.match(/^RT @(\w+):/);
+ if (rtMatch) {
+ fetchUsername = rtMatch[1];
+ }
+
+ // 원본 작성자의 개별 트윗 페이지에서 가져오기
+ const tweet = await fetchSingleTweet(NITTER_URL, fetchUsername, row.post_id);
+
+ // fetchSingleTweet이 RT @ 형식을 반환하면 RT 프리픽스 제거
+ let newContent = tweet.text;
+ const rtPrefixMatch = newContent.match(/^RT @\w+:\s*/);
+ if (rtPrefixMatch) {
+ newContent = newContent.slice(rtPrefixMatch[0].length);
+ }
+ // 끝의 … 제거
+ newContent = newContent.replace(/…$/, '').trim();
+
+ const newTitle = extractTitle(newContent);
+ const newImageUrls = tweet.imageUrls.length > 0 ? JSON.stringify(tweet.imageUrls) : null;
+
+ // schedules 테이블 업데이트
+ await db.query(
+ 'UPDATE schedules SET title = ? WHERE id = ?',
+ [newTitle, row.schedule_id]
+ );
+
+ // schedule_x 테이블 업데이트 (원본 작성자 username도 수정)
+ await db.query(
+ 'UPDATE schedule_x SET username = ?, content = ?, image_urls = ? WHERE schedule_id = ?',
+ [fetchUsername, newContent, newImageUrls, row.schedule_id]
+ );
+
+ // Meilisearch 동기화
+ await syncScheduleById(meilisearch, db, row.schedule_id);
+
+ updated++;
+ fastify.log.info(`리트윗 재수집 완료: schedule_id=${row.schedule_id}, post_id=${row.post_id}`);
+
+ // Nitter 부하 방지
+ await new Promise(r => setTimeout(r, 500));
+ } catch (err) {
+ errors.push({ scheduleId: row.schedule_id, postId: row.post_id, error: err.message });
+ fastify.log.error(`리트윗 재수집 실패 (${row.schedule_id}): ${err.message}`);
+ }
+ }
+
+ logActivity(db, {
+ actor: 'admin',
+ action: 'update',
+ category: 'schedule',
+ targetType: 'x_schedule',
+ summary: `리트윗 재수집: ${updated}/${rows.length}건 완료`,
+ });
+
+ return { success: true, total: rows.length, updated, errors };
+ } catch (err) {
+ fastify.log.error(`리트윗 재수집 오류: ${err.message}`);
+ return serverError(reply, err.message);
+ }
+ });
}
diff --git a/backend/src/services/x/index.js b/backend/src/services/x/index.js
index 8a73fab..6d6e95d 100644
--- a/backend/src/services/x/index.js
+++ b/backend/src/services/x/index.js
@@ -65,6 +65,9 @@ async function xBotPlugin(fastify, opts) {
const time = formatTime(tweet.time);
const title = extractTitle(tweet.text);
+ // 리트윗인 경우 원본 작성자를 username으로 사용
+ const tweetUsername = tweet.originalUsername || username;
+
// 트랜잭션으로 INSERT 작업 수행
return withTransaction(fastify.db, async (connection) => {
// schedules 테이블에 저장
@@ -80,7 +83,7 @@ async function xBotPlugin(fastify, opts) {
[
scheduleId,
tweet.id,
- username,
+ tweetUsername,
tweet.text,
tweet.imageUrls.length > 0 ? JSON.stringify(tweet.imageUrls) : null,
]
diff --git a/backend/src/services/x/scraper.js b/backend/src/services/x/scraper.js
index e37c410..0f69439 100644
--- a/backend/src/services/x/scraper.js
+++ b/backend/src/services/x/scraper.js
@@ -112,6 +112,11 @@ function extractTextFromHtml(html) {
.replace(/
/g, '\n')
// 태그: href에서 원본 URL 추출 (외부 링크만)
.replace(/]*href="([^"]*)"[^>]*>([^<]*)<\/a>/g, (match, href, text) => {
+ // t.co 링크: Nitter가 프록시한 URL을 원본 t.co URL로 변환
+ const tcoMatch = href.match(/\/t\.co\/([^\s"?]+)/);
+ if (tcoMatch) {
+ return `https://t.co/${tcoMatch[1]}`;
+ }
// Nitter 내부 링크 (/search, /hashtag 등)는 표시 텍스트 사용
if (href.startsWith('/')) {
return text;
@@ -146,6 +151,22 @@ export function parseTweets(html, username, options = {}) {
const isRetweet = container.includes('class="retweet-header"');
if (isRetweet && !includeRetweets) continue;
+ // 리트윗인 경우 원본 작성자 추출 (data-username 또는 tweet-header에서)
+ let originalUsername = null;
+ if (isRetweet) {
+ const dataUserMatch = containers[i - 1]?.match(/data-username="([^"]+)"/) ||
+ container.match(/data-username="([^"]+)"/);
+ if (dataUserMatch) {
+ originalUsername = dataUserMatch[1];
+ } else {
+ // tweet-header의 username 링크에서 추출
+ const headerUserMatch = container.match(/class="username"[^>]*href="\/([^"]+)"/);
+ if (headerUserMatch) {
+ originalUsername = headerUserMatch[1];
+ }
+ }
+ }
+
// 트윗 ID
const idMatch = container.match(/href="\/[^\/]+\/status\/(\d+)/);
if (!idMatch) continue;
@@ -171,7 +192,11 @@ export function parseTweets(html, username, options = {}) {
time,
text,
imageUrls,
- url: `https://x.com/${username}/status/${id}`,
+ isRetweet,
+ originalUsername,
+ url: isRetweet && originalUsername
+ ? `https://x.com/${originalUsername}/status/${id}`
+ : `https://x.com/${username}/status/${id}`,
});
}