-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel-metadata.schema.json
More file actions
133 lines (133 loc) · 4.25 KB
/
model-metadata.schema.json
File metadata and controls
133 lines (133 loc) · 4.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/InterwebAlchemy/model-metadata-central/model-metadata.schema.json",
"title": "Language Model Metadata",
"description": "A language model metadata defintion for developer tools",
"type": "object",
"properties": {
"model_id": {
"description": "The identifier of the model (example: gpt-3.5-turbo)",
"type": "string"
},
"model_name": {
"description": "The human-friendly name of the model (example: GPT-3.5 Turbo)",
"type": "string"
},
"model_provider": {
"description": "The provider of the model in lowercase (example: openai)",
"type": "string"
},
"model_description": {
"description": "A human-friendly description of the model",
"type": "string"
},
"model_info": {
"description": "Link to a page with more information about the model",
"type": "string",
"format": "uri"
},
"model_version": {
"description": "The version of the model (example: 0613)",
"type": "string"
},
"model_type": {
"description": "The type of model (example: chat)",
"type": "string",
"enum": ["chat", "completion", "embedding"]
},
"context_window": {
"description": "The maximum number of tokens in the model's context window",
"type": "integer",
"minimum": 1
},
"max_tokens": {
"description": "The maximum number of tokens the model will use to generate a response",
"type": "integer",
"minimum": 1
},
"cost_per_token": {
"description": "The cost per token in USD",
"type": ["object", "number"],
"minimum": 0,
"properties": {
"input": {
"description": "The cost per token of input send to the model",
"type": "number",
"minimum": 0
},
"output": {
"description": "The cost per token of output generated by the model",
"type": "number",
"minimum": 0
}
}
},
"knowledge_cutoff": {
"description": "The training data cutoff date for the model",
"type": "string",
"format": "date"
},
"tokenizer": {
"description": "Tokenizer family and specific encoding/version used by the model",
"$ref": "#/$defs/tokenizer_config"
},
"tuning": {
"description": "Tags for things the model was tuned for (example: function, instruction)",
"type": "array",
"items": {
"type": "string",
"enum": ["function", "instruction", "code", "multilingual", "multimodal"]
},
"uniqueItems": true,
"minItems": 1
},
"deprecated": {
"description": "Whether the model has been deprecated by the provider",
"type": "boolean",
"default": false
},
"providers": {
"description": "The providers that serve this model and their configuration",
"type": "array",
"items": {
"$ref": "#/$defs/provider_reference"
},
"uniqueItems": true,
"minItems": 1
}
},
"required": ["model_id", "model_type", "context_window"],
"$defs": {
"provider_reference": {
"type": "object",
"description": "A reference to a provider definition",
"properties": {
"provider_id": {
"description": "The identifier matching a provider in providers/",
"type": "string"
},
"model_id_on_provider": {
"description": "The model ID as used by this specific provider (may differ from model_id)",
"type": "string"
}
},
"required": ["provider_id"]
},
"tokenizer_config": {
"type": "object",
"description": "Tokenizer family and the specific encoding/version identifier within that family",
"properties": {
"family": {
"description": "Tokenizer library or system (example: tiktoken, tekken, sentencepiece)",
"type": "string",
"enum": ["tiktoken", "tekken", "sentencepiece", "huggingface", "other", "unknown"]
},
"name": {
"description": "The encoding or version identifier within the family (example: 'cl100k_base' for tiktoken, 'v3' for tekken)",
"type": "string"
}
},
"required": ["family"]
}
}
}