| | import { HUB_URL } from "../consts"; |
| | import { createApiError } from "../error"; |
| | import type { ApiDatasetInfo } from "../types/api/api-dataset"; |
| | import type { CredentialsParams } from "../types/public"; |
| | import { checkCredentials } from "../utils/checkCredentials"; |
| | import { parseLinkHeader } from "../utils/parseLinkHeader"; |
| | import { pick } from "../utils/pick"; |
| |
|
| | export const DATASET_EXPAND_KEYS = [ |
| | "private", |
| | "downloads", |
| | "gated", |
| | "likes", |
| | "lastModified", |
| | ] as const satisfies readonly (keyof ApiDatasetInfo)[]; |
| |
|
| | export const DATASET_EXPANDABLE_KEYS = [ |
| | "author", |
| | "cardData", |
| | "citation", |
| | "createdAt", |
| | "disabled", |
| | "description", |
| | "downloads", |
| | "downloadsAllTime", |
| | "gated", |
| | "gitalyUid", |
| | "lastModified", |
| | "likes", |
| | "paperswithcode_id", |
| | "private", |
| | |
| | "sha", |
| | "tags", |
| | ] as const satisfies readonly (keyof ApiDatasetInfo)[]; |
| |
|
| | export interface DatasetEntry { |
| | id: string; |
| | name: string; |
| | private: boolean; |
| | downloads: number; |
| | gated: false | "auto" | "manual"; |
| | likes: number; |
| | updatedAt: Date; |
| | } |
| |
|
| | export async function* listDatasets< |
| | const T extends Exclude<(typeof DATASET_EXPANDABLE_KEYS)[number], (typeof DATASET_EXPAND_KEYS)[number]> = never, |
| | >( |
| | params?: { |
| | search?: { |
| | |
| | |
| | |
| | query?: string; |
| | owner?: string; |
| | tags?: string[]; |
| | }; |
| | hubUrl?: string; |
| | additionalFields?: T[]; |
| | |
| | |
| | |
| | limit?: number; |
| | |
| | |
| | |
| | fetch?: typeof fetch; |
| | } & Partial<CredentialsParams> |
| | ): AsyncGenerator<DatasetEntry & Pick<ApiDatasetInfo, T>> { |
| | const accessToken = params && checkCredentials(params); |
| | let totalToFetch = params?.limit ?? Infinity; |
| | const search = new URLSearchParams([ |
| | ...Object.entries({ |
| | limit: String(Math.min(totalToFetch, 500)), |
| | ...(params?.search?.owner ? { author: params.search.owner } : undefined), |
| | ...(params?.search?.query ? { search: params.search.query } : undefined), |
| | }), |
| | ...(params?.search?.tags?.map((tag) => ["filter", tag]) ?? []), |
| | ...DATASET_EXPAND_KEYS.map((val) => ["expand", val] satisfies [string, string]), |
| | ...(params?.additionalFields?.map((val) => ["expand", val] satisfies [string, string]) ?? []), |
| | ]).toString(); |
| | let url: string | undefined = `${params?.hubUrl || HUB_URL}/api/datasets` + (search ? "?" + search : ""); |
| |
|
| | while (url) { |
| | const res: Response = await (params?.fetch ?? fetch)(url, { |
| | headers: { |
| | accept: "application/json", |
| | ...(accessToken ? { Authorization: `Bearer ${accessToken}` } : undefined), |
| | }, |
| | }); |
| |
|
| | if (!res.ok) { |
| | throw await createApiError(res); |
| | } |
| |
|
| | const items: ApiDatasetInfo[] = await res.json(); |
| |
|
| | for (const item of items) { |
| | yield { |
| | ...(params?.additionalFields && pick(item, params.additionalFields)), |
| | id: item._id, |
| | name: item.id, |
| | private: item.private, |
| | downloads: item.downloads, |
| | likes: item.likes, |
| | gated: item.gated, |
| | updatedAt: new Date(item.lastModified), |
| | } as DatasetEntry & Pick<ApiDatasetInfo, T>; |
| | totalToFetch--; |
| | if (totalToFetch <= 0) { |
| | return; |
| | } |
| | } |
| |
|
| | const linkHeader = res.headers.get("Link"); |
| |
|
| | url = linkHeader ? parseLinkHeader(linkHeader).next : undefined; |
| | |
| | } |
| | } |
| |
|