import { parseNitterDateTime } from '../../utils/date.js';
/**
* 트윗 텍스트에서 첫 문단 추출 (title용)
*/
export function extractTitle(text) {
if (!text) return '';
const paragraphs = text.split(/\n\n+/);
return paragraphs[0]?.trim() || '';
}
/**
* HTML에서 이미지 URL 추출
*/
export function extractImageUrls(html) {
const urls = [];
const regex = /href="\/pic\/(orig\/)?media%2F([^"]+)"/g;
let match;
while ((match = regex.exec(html)) !== null) {
const mediaPath = decodeURIComponent(match[2]);
const cleanPath = mediaPath.split('%3F')[0].split('?')[0];
urls.push(`https://pbs.twimg.com/media/${cleanPath}`);
}
return [...new Set(urls)];
}
/**
* 텍스트에서 유튜브 videoId 추출
*/
export function extractYoutubeVideoIds(text) {
if (!text) return [];
const ids = new Set();
// youtu.be/{id}
const shortRegex = /youtu\.be\/([a-zA-Z0-9_-]{11})/g;
let m;
while ((m = shortRegex.exec(text)) !== null) {
ids.add(m[1]);
}
// youtube.com/watch?v={id}
const watchRegex = /youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})/g;
while ((m = watchRegex.exec(text)) !== null) {
ids.add(m[1]);
}
// youtube.com/shorts/{id}
const shortsRegex = /youtube\.com\/shorts\/([a-zA-Z0-9_-]{11})/g;
while ((m = shortsRegex.exec(text)) !== null) {
ids.add(m[1]);
}
return [...ids];
}
/**
* HTML에서 프로필 정보 추출
*/
export function extractProfile(html) {
const profile = { displayName: null, avatarUrl: null };
const nameMatch = html.match(/class="profile-card-fullname"[^>]*title="([^"]+)"/);
if (nameMatch) {
profile.displayName = nameMatch[1].trim();
}
const avatarMatch = html.match(/class="profile-card-avatar"[^>]*>[\s\S]*?
]*src="([^"]+)"/);
if (avatarMatch) {
let url = avatarMatch[1];
const encodedMatch = url.match(/\/pic\/(.+)/);
if (encodedMatch) {
url = decodeURIComponent(encodedMatch[1]);
}
profile.avatarUrl = url;
}
return profile;
}
/**
* HTML에서 트윗 목록 파싱
*/
export function parseTweets(html, username) {
const tweets = [];
const containers = html.split('class="timeline-item ');
for (let i = 1; i < containers.length; i++) {
const container = containers[i];
// 고정/리트윗 제외
const isPinned = container.includes('class="pinned"');
const isRetweet = container.includes('class="retweet-header"');
if (isPinned || isRetweet) continue;
// 트윗 ID
const idMatch = container.match(/href="\/[^\/]+\/status\/(\d+)/);
if (!idMatch) continue;
const id = idMatch[1];
// 시간
const timeMatch = container.match(/