fix: correct RSS parsing for guid/link/title fields

fast-xml-parser returns elements with attributes (like <guid isPermaLink>)
as { "@_isPermaLink": "true", "#text": "url" } — calling String() on that
gives "[object Object]", making every torrent_id identical and causing the
UNIQUE constraint to drop all but the first episode insert.

Fixes:
- Add textOf() helper that extracts #text from attribute-bearing nodes
- Apply textOf() to guid, link, title, category, size, pubDate fields
- Add isArray config so a single-result feed still returns an array
- Use <link> directly as torrent_url (Nyaa provides the .torrent URL there)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
jason
2026-03-17 15:02:49 -05:00
parent 2872ae8c01
commit 96e7c6e8e5

View File

@@ -2,7 +2,13 @@ import { XMLParser } from 'fast-xml-parser'
import type { NyaaItem } from '../types.js' import type { NyaaItem } from '../types.js'
const NYAA_BASE = 'https://nyaa.si' const NYAA_BASE = 'https://nyaa.si'
const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_' })
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: '@_',
// Always treat <item> as an array, even when there is only one result
isArray: (_name, jpath) => jpath === 'rss.channel.item',
})
/** /**
* Build a Nyaa RSS URL from a search query and optional category. * Build a Nyaa RSS URL from a search query and optional category.
@@ -45,28 +51,45 @@ export async function searchNyaa(query: string, category = '1_2'): Promise<NyaaI
return fetchRss(url) return fetchRss(url)
} }
/**
* Extract the text value from a parsed XML field.
* fast-xml-parser returns elements with attributes as { "@_attr": "val", "#text": "content" }.
* Plain text elements come through as a string or number.
*/
function textOf(val: unknown): string {
if (val === null || val === undefined) return ''
if (typeof val === 'string') return val
if (typeof val === 'number') return String(val)
if (typeof val === 'object') {
const obj = val as Record<string, unknown>
// fast-xml-parser uses '#text' for mixed-content nodes
if ('#text' in obj) return String(obj['#text'])
}
return ''
}
function parseItem(item: Record<string, unknown>): NyaaItem { function parseItem(item: Record<string, unknown>): NyaaItem {
const guid = String(item['guid'] ?? '') // <guid isPermaLink="true">https://nyaa.si/view/1234567</guid>
// guid is like https://nyaa.si/view/1234567 // fast-xml-parser gives us { "@_isPermaLink": "true", "#text": "https://nyaa.si/view/1234567" }
const torrent_id = guid.split('/').pop() ?? guid const guidStr = textOf(item['guid'])
const torrent_id = guidStr.split('/').pop() ?? guidStr
const link = String(item['link'] ?? '') // In Nyaa RSS, <link> is the direct .torrent download URL:
// link in the RSS feed is the magnet or torrent link; torrent download is /download/<id>.torrent // https://nyaa.si/download/1234567.torrent
const torrent_url = torrent_id const linkStr = textOf(item['link'])
? `${NYAA_BASE}/download/${torrent_id}.torrent` const torrent_url = linkStr || (torrent_id ? `${NYAA_BASE}/download/${torrent_id}.torrent` : '')
: link
// Nyaa RSS uses nyaa: namespace for extended fields // Nyaa namespace fields (nyaa:seeders, nyaa:size, etc.)
const magnet = item['nyaa:magnetUri'] ?? item['nyaa:magnetLink'] ?? null const magnet = item['nyaa:magnetUri'] ?? item['nyaa:magnetLink'] ?? null
const category = String(item['nyaa:category'] ?? item['category'] ?? '') const category = textOf(item['nyaa:category'] ?? item['category'])
const size = String(item['nyaa:size'] ?? '') const size = textOf(item['nyaa:size'])
const seeders = Number(item['nyaa:seeders'] ?? 0) const seeders = Number(item['nyaa:seeders'] ?? 0)
const leechers = Number(item['nyaa:leechers'] ?? 0) const leechers = Number(item['nyaa:leechers'] ?? 0)
const downloads = Number(item['nyaa:downloads'] ?? 0) const downloads = Number(item['nyaa:downloads'] ?? 0)
return { return {
torrent_id, torrent_id,
title: String(item['title'] ?? ''), title: textOf(item['title']),
torrent_url, torrent_url,
magnet_url: magnet ? String(magnet) : null, magnet_url: magnet ? String(magnet) : null,
category, category,
@@ -74,28 +97,28 @@ function parseItem(item: Record<string, unknown>): NyaaItem {
seeders, seeders,
leechers, leechers,
downloads, downloads,
published: String(item['pubDate'] ?? ''), published: textOf(item['pubDate']),
} }
} }
/** /**
* Parse an episode number from a torrent title. * Parse an episode number from a torrent title.
* Handles common patterns: " - 12", "[12]", "E12", "EP12", " 12 " * Handles common patterns: " - 12", "[12]", "E12", "EP12", "S01E12"
* Returns the matched string or 'unknown'. * Returns the matched string or 'unknown'.
*/ */
export function parseEpisodeCode(title: string): string { export function parseEpisodeCode(title: string): string {
// Match patterns like " - 12 " or " - 12v2" // Match " - 12 " or " - 12v2"
let m = title.match(/\s-\s(\d{1,4}(?:\.\d)?(?:v\d)?)\s/) let m = title.match(/\s-\s(\d{1,4}(?:\.\d)?(?:v\d)?)\s/)
if (m) return m[1] if (m) return m[1]
// Match [12] or [12v2] // Match [12] or [12v2] (but skip hash-like 6+ char hex blocks e.g. [CC3FE38D])
m = title.match(/\[(\d{1,4}(?:\.\d)?(?:v\d)?)\]/) m = title.match(/\[(\d{1,4}(?:\.\d)?(?:v\d)?)\]/)
if (m) return m[1] if (m) return m[1]
// Match EP12 or E12
m = title.match(/[Ee][Pp]?(\d{1,4})/)
if (m) return m[1]
// Match S01E12 // Match S01E12
m = title.match(/[Ss]\d{1,2}[Ee](\d{1,4})/) m = title.match(/[Ss]\d{1,2}[Ee](\d{1,4})/)
if (m) return m[1] if (m) return m[1]
// Match EP12 or E12
m = title.match(/[Ee][Pp]?(\d{1,4})/)
if (m) return m[1]
return 'unknown' return 'unknown'
} }