fix: correct RSS parsing for guid/link/title fields
fast-xml-parser returns elements with attributes (like <guid isPermaLink>)
as { "@_isPermaLink": "true", "#text": "url" } — calling String() on that
gives "[object Object]", making every torrent_id identical and causing the
UNIQUE constraint to drop all but the first episode insert.
Fixes:
- Add textOf() helper that extracts #text from attribute-bearing nodes
- Apply textOf() to guid, link, title, category, size, pubDate fields
- Add isArray config so a single-result feed still returns an array
- Use <link> directly as torrent_url (Nyaa provides the .torrent URL there)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,13 @@ import { XMLParser } from 'fast-xml-parser'
|
|||||||
import type { NyaaItem } from '../types.js'
|
import type { NyaaItem } from '../types.js'
|
||||||
|
|
||||||
const NYAA_BASE = 'https://nyaa.si'
|
const NYAA_BASE = 'https://nyaa.si'
|
||||||
const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_' })
|
|
||||||
|
const parser = new XMLParser({
|
||||||
|
ignoreAttributes: false,
|
||||||
|
attributeNamePrefix: '@_',
|
||||||
|
// Always treat <item> as an array, even when there is only one result
|
||||||
|
isArray: (_name, jpath) => jpath === 'rss.channel.item',
|
||||||
|
})
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build a Nyaa RSS URL from a search query and optional category.
|
* Build a Nyaa RSS URL from a search query and optional category.
|
||||||
@@ -45,28 +51,45 @@ export async function searchNyaa(query: string, category = '1_2'): Promise<NyaaI
|
|||||||
return fetchRss(url)
|
return fetchRss(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the text value from a parsed XML field.
|
||||||
|
* fast-xml-parser returns elements with attributes as { "@_attr": "val", "#text": "content" }.
|
||||||
|
* Plain text elements come through as a string or number.
|
||||||
|
*/
|
||||||
|
function textOf(val: unknown): string {
|
||||||
|
if (val === null || val === undefined) return ''
|
||||||
|
if (typeof val === 'string') return val
|
||||||
|
if (typeof val === 'number') return String(val)
|
||||||
|
if (typeof val === 'object') {
|
||||||
|
const obj = val as Record<string, unknown>
|
||||||
|
// fast-xml-parser uses '#text' for mixed-content nodes
|
||||||
|
if ('#text' in obj) return String(obj['#text'])
|
||||||
|
}
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
function parseItem(item: Record<string, unknown>): NyaaItem {
|
function parseItem(item: Record<string, unknown>): NyaaItem {
|
||||||
const guid = String(item['guid'] ?? '')
|
// <guid isPermaLink="true">https://nyaa.si/view/1234567</guid>
|
||||||
// guid is like https://nyaa.si/view/1234567
|
// fast-xml-parser gives us { "@_isPermaLink": "true", "#text": "https://nyaa.si/view/1234567" }
|
||||||
const torrent_id = guid.split('/').pop() ?? guid
|
const guidStr = textOf(item['guid'])
|
||||||
|
const torrent_id = guidStr.split('/').pop() ?? guidStr
|
||||||
|
|
||||||
const link = String(item['link'] ?? '')
|
// In Nyaa RSS, <link> is the direct .torrent download URL:
|
||||||
// link in the RSS feed is the magnet or torrent link; torrent download is /download/<id>.torrent
|
// https://nyaa.si/download/1234567.torrent
|
||||||
const torrent_url = torrent_id
|
const linkStr = textOf(item['link'])
|
||||||
? `${NYAA_BASE}/download/${torrent_id}.torrent`
|
const torrent_url = linkStr || (torrent_id ? `${NYAA_BASE}/download/${torrent_id}.torrent` : '')
|
||||||
: link
|
|
||||||
|
|
||||||
// Nyaa RSS uses nyaa: namespace for extended fields
|
// Nyaa namespace fields (nyaa:seeders, nyaa:size, etc.)
|
||||||
const magnet = item['nyaa:magnetUri'] ?? item['nyaa:magnetLink'] ?? null
|
const magnet = item['nyaa:magnetUri'] ?? item['nyaa:magnetLink'] ?? null
|
||||||
const category = String(item['nyaa:category'] ?? item['category'] ?? '')
|
const category = textOf(item['nyaa:category'] ?? item['category'])
|
||||||
const size = String(item['nyaa:size'] ?? '')
|
const size = textOf(item['nyaa:size'])
|
||||||
const seeders = Number(item['nyaa:seeders'] ?? 0)
|
const seeders = Number(item['nyaa:seeders'] ?? 0)
|
||||||
const leechers = Number(item['nyaa:leechers'] ?? 0)
|
const leechers = Number(item['nyaa:leechers'] ?? 0)
|
||||||
const downloads = Number(item['nyaa:downloads'] ?? 0)
|
const downloads = Number(item['nyaa:downloads'] ?? 0)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
torrent_id,
|
torrent_id,
|
||||||
title: String(item['title'] ?? ''),
|
title: textOf(item['title']),
|
||||||
torrent_url,
|
torrent_url,
|
||||||
magnet_url: magnet ? String(magnet) : null,
|
magnet_url: magnet ? String(magnet) : null,
|
||||||
category,
|
category,
|
||||||
@@ -74,28 +97,28 @@ function parseItem(item: Record<string, unknown>): NyaaItem {
|
|||||||
seeders,
|
seeders,
|
||||||
leechers,
|
leechers,
|
||||||
downloads,
|
downloads,
|
||||||
published: String(item['pubDate'] ?? ''),
|
published: textOf(item['pubDate']),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse an episode number from a torrent title.
|
* Parse an episode number from a torrent title.
|
||||||
* Handles common patterns: " - 12", "[12]", "E12", "EP12", " 12 "
|
* Handles common patterns: " - 12", "[12]", "E12", "EP12", "S01E12"
|
||||||
* Returns the matched string or 'unknown'.
|
* Returns the matched string or 'unknown'.
|
||||||
*/
|
*/
|
||||||
export function parseEpisodeCode(title: string): string {
|
export function parseEpisodeCode(title: string): string {
|
||||||
// Match patterns like " - 12 " or " - 12v2"
|
// Match " - 12 " or " - 12v2"
|
||||||
let m = title.match(/\s-\s(\d{1,4}(?:\.\d)?(?:v\d)?)\s/)
|
let m = title.match(/\s-\s(\d{1,4}(?:\.\d)?(?:v\d)?)\s/)
|
||||||
if (m) return m[1]
|
if (m) return m[1]
|
||||||
// Match [12] or [12v2]
|
// Match [12] or [12v2] (but skip hash-like 6+ char hex blocks e.g. [CC3FE38D])
|
||||||
m = title.match(/\[(\d{1,4}(?:\.\d)?(?:v\d)?)\]/)
|
m = title.match(/\[(\d{1,4}(?:\.\d)?(?:v\d)?)\]/)
|
||||||
if (m) return m[1]
|
if (m) return m[1]
|
||||||
// Match EP12 or E12
|
|
||||||
m = title.match(/[Ee][Pp]?(\d{1,4})/)
|
|
||||||
if (m) return m[1]
|
|
||||||
// Match S01E12
|
// Match S01E12
|
||||||
m = title.match(/[Ss]\d{1,2}[Ee](\d{1,4})/)
|
m = title.match(/[Ss]\d{1,2}[Ee](\d{1,4})/)
|
||||||
if (m) return m[1]
|
if (m) return m[1]
|
||||||
|
// Match EP12 or E12
|
||||||
|
m = title.match(/[Ee][Pp]?(\d{1,4})/)
|
||||||
|
if (m) return m[1]
|
||||||
return 'unknown'
|
return 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user