fix: correct RSS parsing for guid/link/title fields

fast-xml-parser returns elements with attributes (like <guid isPermaLink>)
as { "@_isPermaLink": "true", "#text": "url" } — calling String() on that
gives "[object Object]", making every torrent_id identical and causing the
UNIQUE constraint to drop all but the first episode insert.

Fixes:
- Add textOf() helper that extracts #text from attribute-bearing nodes
- Apply textOf() to guid, link, title, category, size, pubDate fields
- Add isArray config so a single-result feed still returns an array
- Use <link> directly as torrent_url (Nyaa provides the .torrent URL there)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
jason
2026-03-17 15:02:49 -05:00
parent 2872ae8c01
commit 96e7c6e8e5

View File

@@ -2,7 +2,13 @@ import { XMLParser } from 'fast-xml-parser'
import type { NyaaItem } from '../types.js'
const NYAA_BASE = 'https://nyaa.si'
const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_' })
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: '@_',
// Always treat <item> as an array, even when there is only one result
isArray: (_name, jpath) => jpath === 'rss.channel.item',
})
/**
* Build a Nyaa RSS URL from a search query and optional category.
@@ -45,28 +51,45 @@ export async function searchNyaa(query: string, category = '1_2'): Promise<NyaaI
return fetchRss(url)
}
/**
* Extract the text value from a parsed XML field.
* fast-xml-parser returns elements with attributes as { "@_attr": "val", "#text": "content" }.
* Plain text elements come through as a string or number.
*/
function textOf(val: unknown): string {
if (val === null || val === undefined) return ''
if (typeof val === 'string') return val
if (typeof val === 'number') return String(val)
if (typeof val === 'object') {
const obj = val as Record<string, unknown>
// fast-xml-parser uses '#text' for mixed-content nodes
if ('#text' in obj) return String(obj['#text'])
}
return ''
}
function parseItem(item: Record<string, unknown>): NyaaItem {
const guid = String(item['guid'] ?? '')
// guid is like https://nyaa.si/view/1234567
const torrent_id = guid.split('/').pop() ?? guid
// <guid isPermaLink="true">https://nyaa.si/view/1234567</guid>
// fast-xml-parser gives us { "@_isPermaLink": "true", "#text": "https://nyaa.si/view/1234567" }
const guidStr = textOf(item['guid'])
const torrent_id = guidStr.split('/').pop() ?? guidStr
const link = String(item['link'] ?? '')
// link in the RSS feed is the magnet or torrent link; torrent download is /download/<id>.torrent
const torrent_url = torrent_id
? `${NYAA_BASE}/download/${torrent_id}.torrent`
: link
// In Nyaa RSS, <link> is the direct .torrent download URL:
// https://nyaa.si/download/1234567.torrent
const linkStr = textOf(item['link'])
const torrent_url = linkStr || (torrent_id ? `${NYAA_BASE}/download/${torrent_id}.torrent` : '')
// Nyaa RSS uses nyaa: namespace for extended fields
// Nyaa namespace fields (nyaa:seeders, nyaa:size, etc.)
const magnet = item['nyaa:magnetUri'] ?? item['nyaa:magnetLink'] ?? null
const category = String(item['nyaa:category'] ?? item['category'] ?? '')
const size = String(item['nyaa:size'] ?? '')
const category = textOf(item['nyaa:category'] ?? item['category'])
const size = textOf(item['nyaa:size'])
const seeders = Number(item['nyaa:seeders'] ?? 0)
const leechers = Number(item['nyaa:leechers'] ?? 0)
const downloads = Number(item['nyaa:downloads'] ?? 0)
return {
torrent_id,
title: String(item['title'] ?? ''),
title: textOf(item['title']),
torrent_url,
magnet_url: magnet ? String(magnet) : null,
category,
@@ -74,28 +97,28 @@ function parseItem(item: Record<string, unknown>): NyaaItem {
seeders,
leechers,
downloads,
published: String(item['pubDate'] ?? ''),
published: textOf(item['pubDate']),
}
}
/**
* Parse an episode number from a torrent title.
* Handles common patterns: " - 12", "[12]", "E12", "EP12", " 12 "
* Handles common patterns: " - 12", "[12]", "E12", "EP12", "S01E12"
* Returns the matched string or 'unknown'.
*/
export function parseEpisodeCode(title: string): string {
// Match patterns like " - 12 " or " - 12v2"
// Match " - 12 " or " - 12v2"
let m = title.match(/\s-\s(\d{1,4}(?:\.\d)?(?:v\d)?)\s/)
if (m) return m[1]
// Match [12] or [12v2]
// Match [12] or [12v2] (but skip hash-like 6+ char hex blocks e.g. [CC3FE38D])
m = title.match(/\[(\d{1,4}(?:\.\d)?(?:v\d)?)\]/)
if (m) return m[1]
// Match EP12 or E12
m = title.match(/[Ee][Pp]?(\d{1,4})/)
if (m) return m[1]
// Match S01E12
m = title.match(/[Ss]\d{1,2}[Ee](\d{1,4})/)
if (m) return m[1]
// Match EP12 or E12
m = title.match(/[Ee][Pp]?(\d{1,4})/)
if (m) return m[1]
return 'unknown'
}