Upload ModernBERT model

Browse files

Files changed (13) hide show

1_Pooling/config.json +10 -0
README.md +559 -0
added_tokens.json +7 -0
config.json +48 -0
config_sentence_transformers.json +14 -0
merges.txt +0 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
vocab.json +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 512,
+    "pooling_mode_cls_token": true,
+    "pooling_mode_mean_tokens": false,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,559 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:58800
+- loss:MultipleNegativesRankingLoss
+base_model: Shuu12121/CodeModernBERT-Finch
+widget:
+- source_sentence: 'Returns boolean indicating whether the requestUrl matches against
+    the paths configured.
+    @param requestedUrl - url requested by user
+    @param opts - unless configuration
+    @returns {boolean}'
+  sentences:
+  - "def xmoe2_v1_l4k_global_only():\n  \"\"\"\"\"\"\n  hparams = xmoe2_v1_l4k()\n\
+    \  hparams.decoder_layers = [\n      \"att\" if l == \"local_att\" else l for\
+    \ l in hparams.decoder_layers]\n  return hparams"
+  - "function matchesPath(requestedUrl, opts) {\n  var paths = !opts.path || Array.isArray(opts.path)\
+    \ ?\n    opts.path : [opts.path];\n\n  if (paths) {\n    return paths.some(function(p)\
+    \ {\n      return (typeof p === 'string' && p === requestedUrl.pathname) ||\n\
+    \        (p instanceof RegExp && !! p.exec(requestedUrl.pathname));\n    });\n\
+    \  }\n\n  return false;\n}"
+  - "public static function factory($accessToken, $currentTeam)\n    {\n        $client\
+    \ = Client::factory($accessToken);\n\n        return new self($client, $currentTeam);\n\
+    \    }"
+- source_sentence: '// New creates a new ImageGraphics including an image.RGBA of
+    dimension w x h
+    // with background bgcol. If font is nil it will use a builtin font.
+    // If fontsize is empty useful default are used.'
+  sentences:
+  - "func New(width, height int, bgcol color.RGBA, font *truetype.Font, fontsize map[chart.FontSize]float64)\
+    \ *ImageGraphics {\n\timg := image.NewRGBA(image.Rect(0, 0, width, height))\n\t\
+    gc := draw2dimg.NewGraphicContext(img)\n\tgc.SetLineJoin(draw2d.BevelJoin)\n\t\
+    gc.SetLineCap(draw2d.SquareCap)\n\tgc.SetStrokeColor(image.Black)\n\tgc.SetFillColor(bgcol)\n\
+    \tgc.Translate(0.5, 0.5)\n\tgc.Clear()\n\tif font == nil {\n\t\tfont = defaultFont\n\
+    \t}\n\tif len(fontsize) == 0 {\n\t\tfontsize = ConstructFontSizes(13)\n\t}\n\t\
+    return &ImageGraphics{Image: img, x0: 0, y0: 0, w: width, h: height,\n\t\tbg:\
+    \ bgcol, gc: gc, font: font, fs: fontsize}\n}"
+  - "public static void requestDataLogsForApp(final Context context, final UUID appUuid)\
+    \ {\n        final Intent requestIntent = new Intent(INTENT_DL_REQUEST_DATA);\n\
+    \        requestIntent.putExtra(APP_UUID, appUuid);\n        context.sendBroadcast(requestIntent);\n\
+    \    }"
+  - "final protected function setWriteMode($mode)\n    {\n        if (!in_array($mode,\
+    \ [static::WRITE_MODE_INSERT, static::WRITE_MODE_UPSERT, static::WRITE_MODE_UPDATE]))\
+    \ {\n            throw new \\InvalidArgumentException(sprintf('Passed write mode\
+    \ \"%s\" is invalid!', $mode));\n        }\n        $this->writeMode = $mode;\n\
+    \    }"
+- source_sentence: 'Builds the path for a closed arc, returning a PolygonOptions that
+    can be
+    further customised before use.
+    @param center
+    @param start
+    @param end
+    @param arcType Pass in either ArcType.CHORD or ArcType.ROUND
+    @return PolygonOptions with the paths element populated.'
+  sentences:
+  - "function getJavaScriptCallbackParameterListSimple(parameters) {\n    var result\
+    \ = []\n\n    parameters.forEach(function(parameter){\n        if (!parameter.out)\
+    \ return\n        result.push(\"/*\" + getIdlType(parameter.type) + \"*/ \"+ parameter.name)\n\
+    \    })\n\n    return result.join(\", \")\n}"
+  - "public Color getBackground() {\r\n\t\tpredraw();\r\n\t\tFloatBuffer buffer =\
+    \ BufferUtils.createFloatBuffer(16);\r\n\t\tGL.glGetFloat(SGL.GL_COLOR_CLEAR_VALUE,\
+    \ buffer);\r\n\t\tpostdraw();\r\n\r\n\t\treturn new Color(buffer);\r\n\t}"
+  - "public static final PolygonOptions buildClosedArc(LatLong center, LatLong start,\
+    \ LatLong end, ArcType arcType) {\n        MVCArray res = buildArcPoints(center,\
+    \ start, end);\n        if (ArcType.ROUND.equals(arcType)) {\n            res.push(center);\n\
+    \        }\n        return new PolygonOptions().paths(res);\n    }"
+- source_sentence: "Read data from a spread sheet. Return the data in a dict with\n\
+    \    column numbers as keys.\n\n    sheet: xlrd.sheet.Sheet instance\n       \
+    \ Ready for use.\n\n    startstops: list\n        Four StartStop objects defining\
+    \ the data to read. See\n        :func:`~channelpack.pullxl.prepread`.\n\n   \
+    \ usecols: str or seqence of ints or None\n        The columns to use, 0-based.\
+    \ 0 is the spread sheet column\n        \"A\". Can be given as a string also -\
+    \ 'C:E, H' for columns C, D,\n        E and H.\n\n    Values in the returned dict\
+    \ are numpy arrays. Types are set based on\n    the types in the spread sheet."
+  sentences:
+  - "public function handleScanNotify(callable $callback)\n    {\n        $notify\
+    \ = $this->getNotify();\n\n        if (!$notify->isValid()) {\n            throw\
+    \ new FaultException('Invalid request payloads.', 400);\n        }\n\n       \
+    \ $notify = $notify->getNotify();\n\n        try {\n            $prepayId = call_user_func_array($callback,\
+    \ [$notify->get('product_id'), $notify->get('openid'), $notify]);\n          \
+    \  $response = [\n                'return_code' => 'SUCCESS',\n              \
+    \  'appid'       => $this->merchant->app_id,\n                'mch_id'      =>\
+    \ $this->merchant->merchant_id,\n                'nonce_str'   => uniqid(),\n\
+    \                'prepay_id'   => strval($prepayId),\n                'result_code'\
+    \ => 'SUCCESS',\n            ];\n            $response['sign'] = generate_sign($response,\
+    \ $this->merchant->key);\n        } catch (\\Exception $e) {\n            $response\
+    \ = [\n                'return_code'  => 'SUCCESS',\n                'return_msg'\
+    \   => $e->getCode(),\n                'result_code'  => 'FAIL',\n           \
+    \     'err_code_des' => $e->getMessage(),\n            ];\n        }\n\n     \
+    \   return new Response(XML::build($response));\n    }"
+  - "def _sheet_asdict(sheet, startstops, usecols=None):\n    \"\"\"\n    \"\"\"\n\
+    \n    _, _, start, stop = startstops\n    usecols = _sanitize_usecols(usecols)\n\
+    \n    if usecols is not None:\n        iswithin = start.col <= min(usecols) and\
+    \ stop.col > max(usecols)\n        mess = 'Column in usecols outside defined data\
+    \ range, got '\n        assert iswithin, mess + str(usecols)\n    else:      \
+    \                 # usecols is None.\n        usecols = tuple(range(start.col,\
+    \ stop.col))\n\n    # cols = usecols or range(start.col, stop.col)\n    D = dict()\n\
+    \n    for c in usecols:\n        cells = sheet.col(c, start_rowx=start.row, end_rowx=stop.row)\n\
+    \        types = set([cell.ctype for cell in cells])\n\n        # Replace empty\
+    \ values with nan if appropriate:\n        if (not types - NANABLE) and xlrd.XL_CELL_NUMBER\
+    \ in types:\n            D[c] = np.array([np.nan if cell.value == '' else cell.value\n\
+    \                             for cell in cells])\n        elif xlrd.XL_CELL_DATE\
+    \ in types:\n            dm = sheet.book.datemode\n            vals = []\n   \
+    \         for cell in cells:\n                if cell.ctype == xlrd.XL_CELL_DATE:\n\
+    \                    dtuple = xlrd.xldate_as_tuple(cell.value, dm)\n         \
+    \           vals.append(datetime.datetime(*dtuple))\n                elif cell.ctype\
+    \ in NONABLES:\n                    vals.append(None)\n                else:\n\
+    \                    vals.append(cell.value)\n            D[c] = np.array(vals)\n\
+    \        else:\n            vals = [None if cell.ctype in NONABLES else cell.value\n\
+    \                    for cell in cells]\n            D[c] = np.array(vals)\n\n\
+    \    return D"
+  - "func (o Option) RequiresOption(name string) bool {\n\tfor _, o := range o.Requires\
+    \ {\n\t\tif o == name {\n\t\t\treturn true\n\t\t}\n\t}\n\n\treturn false\n}"
+- source_sentence: '// reBuild partially rebuilds a site given the filesystem events.
+    // It returns whetever the content source was changed.
+    // TODO(bep) clean up/rewrite this method.'
+  sentences:
+  - "func WebPageImageResolver(doc *goquery.Document) ([]candidate, int) {\n\timgs\
+    \ := doc.Find(\"img\")\n\n\tvar candidates []candidate\n\tsignificantSurface :=\
+    \ 320 * 200\n\tsignificantSurfaceCount := 0\n\tsrc := \"\"\n\timgs.Each(func(i\
+    \ int, tag *goquery.Selection) {\n\t\tvar surface int\n\t\tsrc = getImageSrc(tag)\n\
+    \t\tif src == \"\" {\n\t\t\treturn\n\t\t}\n\n\t\twidth, _ := tag.Attr(\"width\"\
+    )\n\t\theight, _ := tag.Attr(\"height\")\n\t\tif width != \"\" {\n\t\t\tw, _ :=\
+    \ strconv.Atoi(width)\n\t\t\tif height != \"\" {\n\t\t\t\th, _ := strconv.Atoi(height)\n\
+    \t\t\t\tsurface = w * h\n\t\t\t} else {\n\t\t\t\tsurface = w\n\t\t\t}\n\t\t} else\
+    \ {\n\t\t\tif height != \"\" {\n\t\t\t\tsurface, _ = strconv.Atoi(height)\n\t\t\
+    \t} else {\n\t\t\t\tsurface = 0\n\t\t\t}\n\t\t}\n\n\t\tif surface > significantSurface\
+    \ {\n\t\t\tsignificantSurfaceCount++\n\t\t}\n\n\t\ttagscore := score(tag)\n\t\t\
+    if tagscore >= 0 {\n\t\t\tc := candidate{\n\t\t\t\turl:     src,\n\t\t\t\tsurface:\
+    \ surface,\n\t\t\t\tscore:   score(tag),\n\t\t\t}\n\t\t\tcandidates = append(candidates,\
+    \ c)\n\t\t}\n\t})\n\n\tif len(candidates) == 0 {\n\t\treturn nil, 0\n\t}\n\n\t\
+    return candidates, significantSurfaceCount\n\n}"
+  - "@SuppressWarnings(\"rawtypes\")\n\tpublic void open(Map conf, TopologyContext\
+    \ context,\n\t\t\tSpoutOutputCollector collector) {\n\t\tif(this.jmsProvider ==\
+    \ null){\n\t\t\tthrow new IllegalStateException(\"JMS provider has not been set.\"\
+    );\n\t\t}\n\t\tif(this.tupleProducer == null){\n\t\t\tthrow new IllegalStateException(\"\
+    JMS Tuple Producer has not been set.\");\n\t\t}\n\t\tInteger topologyTimeout =\
+    \ (Integer)conf.get(\"topology.message.timeout.secs\");\n\t\t// TODO fine a way\
+    \ to get the default timeout from storm, so we're not hard-coding to 30 seconds\
+    \ (it could change)\n\t\ttopologyTimeout = topologyTimeout == null ? 30 : topologyTimeout;\n\
+    \t\tif( (topologyTimeout.intValue() * 1000 )> this.recoveryPeriod){\n\t\t    LOG.warn(\"\
+    *** WARNING *** : \" +\n\t\t    \t\t\"Recovery period (\"+ this.recoveryPeriod\
+    \ + \" ms.) is less then the configured \" +\n\t\t    \t\t\"'topology.message.timeout.secs'\
+    \ of \" + topologyTimeout + \n\t\t    \t\t\" secs. This could lead to a message\
+    \ replay flood!\");\n\t\t}\n\t\tthis.queue = new LinkedBlockingQueue<Message>();\n\
+    \t\tthis.toCommit = new TreeSet<JmsMessageID>();\n        this.pendingMessages\
+    \ = new HashMap<JmsMessageID, Message>();\n\t\tthis.collector = collector;\n\t\
+    \ttry {\n\t\t\tConnectionFactory cf = this.jmsProvider.connectionFactory();\n\t\
+    \t\tDestination dest = this.jmsProvider.destination();\n\t\t\tthis.connection\
+    \ = cf.createConnection();\n\t\t\tthis.session = connection.createSession(false,\n\
+    \t\t\t\t\tthis.jmsAcknowledgeMode);\n\t\t\tMessageConsumer consumer = session.createConsumer(dest);\n\
+    \t\t\tconsumer.setMessageListener(this);\n\t\t\tthis.connection.start();\n\t\t\
+    \tif (this.isDurableSubscription() && this.recoveryPeriod > 0){\n\t\t\t    this.recoveryTimer\
+    \ = new Timer();\n\t\t\t    this.recoveryTimer.scheduleAtFixedRate(new RecoveryTask(),\
+    \ 10, this.recoveryPeriod);\n\t\t\t}\n\t\t\t\n\t\t} catch (Exception e) {\n\t\t\
+    \tLOG.warn(\"Error creating JMS connection.\", e);\n\t\t}\n\n\t}"
+  - "func (s *Site) processPartial(events []fsnotify.Event) (whatChanged, error) {\n\
+    \n\tevents = s.filterFileEvents(events)\n\tevents = s.translateFileEvents(events)\n\
+    \n\ts.Log.DEBUG.Printf(\"Rebuild for events %q\", events)\n\n\th := s.h\n\n\t\
+    // First we need to determine what changed\n\n\tvar (\n\t\tsourceChanged     \
+    \  = []fsnotify.Event{}\n\t\tsourceReallyChanged = []fsnotify.Event{}\n\t\tcontentFilesChanged\
+    \ []string\n\t\ttmplChanged         = []fsnotify.Event{}\n\t\tdataChanged    \
+    \     = []fsnotify.Event{}\n\t\ti18nChanged         = []fsnotify.Event{}\n\t\t\
+    shortcodesChanged   = make(map[string]bool)\n\t\tsourceFilesChanged  = make(map[string]bool)\n\
+    \n\t\t// prevent spamming the log on changes\n\t\tlogger = helpers.NewDistinctFeedbackLogger()\n\
+    \t)\n\n\tcachePartitions := make([]string, len(events))\n\n\tfor i, ev := range\
+    \ events {\n\t\tcachePartitions[i] = resources.ResourceKeyPartition(ev.Name)\n\
+    \n\t\tif s.isContentDirEvent(ev) {\n\t\t\tlogger.Println(\"Source changed\", ev)\n\
+    \t\t\tsourceChanged = append(sourceChanged, ev)\n\t\t}\n\t\tif s.isLayoutDirEvent(ev)\
+    \ {\n\t\t\tlogger.Println(\"Template changed\", ev)\n\t\t\ttmplChanged = append(tmplChanged,\
+    \ ev)\n\n\t\t\tif strings.Contains(ev.Name, \"shortcodes\") {\n\t\t\t\tshortcode\
+    \ := filepath.Base(ev.Name)\n\t\t\t\tshortcode = strings.TrimSuffix(shortcode,\
+    \ filepath.Ext(shortcode))\n\t\t\t\tshortcodesChanged[shortcode] = true\n\t\t\t\
+    }\n\t\t}\n\t\tif s.isDataDirEvent(ev) {\n\t\t\tlogger.Println(\"Data changed\"\
+    , ev)\n\t\t\tdataChanged = append(dataChanged, ev)\n\t\t}\n\t\tif s.isI18nEvent(ev)\
+    \ {\n\t\t\tlogger.Println(\"i18n changed\", ev)\n\t\t\ti18nChanged = append(dataChanged,\
+    \ ev)\n\t\t}\n\t}\n\n\t// These in memory resource caches will be rebuilt on demand.\n\
+    \tfor _, s := range s.h.Sites {\n\t\ts.ResourceSpec.ResourceCache.DeletePartitions(cachePartitions...)\n\
+    \t}\n\n\tif len(tmplChanged) > 0 || len(i18nChanged) > 0 {\n\t\tsites := s.h.Sites\n\
+    \t\tfirst := sites[0]\n\n\t\ts.h.init.Reset()\n\n\t\t// TOD(bep) globals clean\n\
+    \t\tif err := first.Deps.LoadResources(); err != nil {\n\t\t\treturn whatChanged{},\
+    \ err\n\t\t}\n\n\t\tfor i := 1; i < len(sites); i++ {\n\t\t\tsite := sites[i]\n\
+    \t\t\tvar err error\n\t\t\tdepsCfg := deps.DepsCfg{\n\t\t\t\tLanguage:      site.language,\n\
+    \t\t\t\tMediaTypes:    site.mediaTypesConfig,\n\t\t\t\tOutputFormats: site.outputFormatsConfig,\n\
+    \t\t\t}\n\t\t\tsite.Deps, err = first.Deps.ForLanguage(depsCfg, func(d *deps.Deps)\
+    \ error {\n\t\t\t\td.Site = &site.Info\n\t\t\t\treturn nil\n\t\t\t})\n\t\t\tif\
+    \ err != nil {\n\t\t\t\treturn whatChanged{}, err\n\t\t\t}\n\t\t}\n\t}\n\n\tif\
+    \ len(dataChanged) > 0 {\n\t\ts.h.init.data.Reset()\n\t}\n\n\tfor _, ev := range\
+    \ sourceChanged {\n\t\tremoved := false\n\n\t\tif ev.Op&fsnotify.Remove == fsnotify.Remove\
+    \ {\n\t\t\tremoved = true\n\t\t}\n\n\t\t// Some editors (Vim) sometimes issue\
+    \ only a Rename operation when writing an existing file\n\t\t// Sometimes a rename\
+    \ operation means that file has been renamed other times it means\n\t\t// it's\
+    \ been updated\n\t\tif ev.Op&fsnotify.Rename == fsnotify.Rename {\n\t\t\t// If\
+    \ the file is still on disk, it's only been updated, if it's not, it's been moved\n\
+    \t\t\tif ex, err := afero.Exists(s.Fs.Source, ev.Name); !ex || err != nil {\n\t\
+    \t\t\tremoved = true\n\t\t\t}\n\t\t}\n\t\tif removed && IsContentFile(ev.Name)\
+    \ {\n\t\t\th.removePageByFilename(ev.Name)\n\t\t}\n\n\t\tsourceReallyChanged =\
+    \ append(sourceReallyChanged, ev)\n\t\tsourceFilesChanged[ev.Name] = true\n\t\
+    }\n\n\tfor shortcode := range shortcodesChanged {\n\t\t// There are certain scenarios\
+    \ that, when a shortcode changes,\n\t\t// it isn't sufficient to just rerender\
+    \ the already parsed shortcode.\n\t\t// One example is if the user adds a new\
+    \ shortcode to the content file first,\n\t\t// and then creates the shortcode\
+    \ on the file system.\n\t\t// To handle these scenarios, we must do a full reprocessing\
+    \ of the\n\t\t// pages that keeps a reference to the changed shortcode.\n\t\t\
+    pagesWithShortcode := h.findPagesByShortcode(shortcode)\n\t\tfor _, p := range\
+    \ pagesWithShortcode {\n\t\t\tcontentFilesChanged = append(contentFilesChanged,\
+    \ p.File().Filename())\n\t\t}\n\t}\n\n\tif len(sourceReallyChanged) > 0 || len(contentFilesChanged)\
+    \ > 0 {\n\t\tvar filenamesChanged []string\n\t\tfor _, e := range sourceReallyChanged\
+    \ {\n\t\t\tfilenamesChanged = append(filenamesChanged, e.Name)\n\t\t}\n\t\tif\
+    \ len(contentFilesChanged) > 0 {\n\t\t\tfilenamesChanged = append(filenamesChanged,\
+    \ contentFilesChanged...)\n\t\t}\n\n\t\tfilenamesChanged = helpers.UniqueStrings(filenamesChanged)\n\
+    \n\t\tif err := s.readAndProcessContent(filenamesChanged...); err != nil {\n\t\
+    \t\treturn whatChanged{}, err\n\t\t}\n\n\t}\n\n\tchanged := whatChanged{\n\t\t\
+    source: len(sourceChanged) > 0 || len(shortcodesChanged) > 0,\n\t\tother:  len(tmplChanged)\
+    \ > 0 || len(i18nChanged) > 0 || len(dataChanged) > 0,\n\t\tfiles:  sourceFilesChanged,\n\
+    \t}\n\n\treturn changed, nil\n\n}"
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on Shuu12121/CodeModernBERT-Finch
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Shuu12121/CodeModernBERT-Finch](https://huggingface.co/Shuu12121/CodeModernBERT-Finch). It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [Shuu12121/CodeModernBERT-Finch](https://huggingface.co/Shuu12121/CodeModernBERT-Finch) <!-- at revision cb1142a6a402471e02d11005b239f349c6d79be0 -->
+- **Maximum Sequence Length:** 1024 tokens
+- **Output Dimensionality:** 512 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
+  (1): Pooling({'word_embedding_dimension': 512, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    '// reBuild partially rebuilds a site given the filesystem events.\n// It returns whetever the content source was changed.\n// TODO(bep) clean up/rewrite this method.',
+    'func (s *Site) processPartial(events []fsnotify.Event) (whatChanged, error) {\n\n\tevents = s.filterFileEvents(events)\n\tevents = s.translateFileEvents(events)\n\n\ts.Log.DEBUG.Printf("Rebuild for events %q", events)\n\n\th := s.h\n\n\t// First we need to determine what changed\n\n\tvar (\n\t\tsourceChanged       = []fsnotify.Event{}\n\t\tsourceReallyChanged = []fsnotify.Event{}\n\t\tcontentFilesChanged []string\n\t\ttmplChanged         = []fsnotify.Event{}\n\t\tdataChanged         = []fsnotify.Event{}\n\t\ti18nChanged         = []fsnotify.Event{}\n\t\tshortcodesChanged   = make(map[string]bool)\n\t\tsourceFilesChanged  = make(map[string]bool)\n\n\t\t// prevent spamming the log on changes\n\t\tlogger = helpers.NewDistinctFeedbackLogger()\n\t)\n\n\tcachePartitions := make([]string, len(events))\n\n\tfor i, ev := range events {\n\t\tcachePartitions[i] = resources.ResourceKeyPartition(ev.Name)\n\n\t\tif s.isContentDirEvent(ev) {\n\t\t\tlogger.Println("Source changed", ev)\n\t\t\tsourceChanged = append(sourceChanged, ev)\n\t\t}\n\t\tif s.isLayoutDirEvent(ev) {\n\t\t\tlogger.Println("Template changed", ev)\n\t\t\ttmplChanged = append(tmplChanged, ev)\n\n\t\t\tif strings.Contains(ev.Name, "shortcodes") {\n\t\t\t\tshortcode := filepath.Base(ev.Name)\n\t\t\t\tshortcode = strings.TrimSuffix(shortcode, filepath.Ext(shortcode))\n\t\t\t\tshortcodesChanged[shortcode] = true\n\t\t\t}\n\t\t}\n\t\tif s.isDataDirEvent(ev) {\n\t\t\tlogger.Println("Data changed", ev)\n\t\t\tdataChanged = append(dataChanged, ev)\n\t\t}\n\t\tif s.isI18nEvent(ev) {\n\t\t\tlogger.Println("i18n changed", ev)\n\t\t\ti18nChanged = append(dataChanged, ev)\n\t\t}\n\t}\n\n\t// These in memory resource caches will be rebuilt on demand.\n\tfor _, s := range s.h.Sites {\n\t\ts.ResourceSpec.ResourceCache.DeletePartitions(cachePartitions...)\n\t}\n\n\tif len(tmplChanged) > 0 || len(i18nChanged) > 0 {\n\t\tsites := s.h.Sites\n\t\tfirst := sites[0]\n\n\t\ts.h.init.Reset()\n\n\t\t// TOD(bep) globals clean\n\t\tif err := first.Deps.LoadResources(); err != nil {\n\t\t\treturn whatChanged{}, err\n\t\t}\n\n\t\tfor i := 1; i < len(sites); i++ {\n\t\t\tsite := sites[i]\n\t\t\tvar err error\n\t\t\tdepsCfg := deps.DepsCfg{\n\t\t\t\tLanguage:      site.language,\n\t\t\t\tMediaTypes:    site.mediaTypesConfig,\n\t\t\t\tOutputFormats: site.outputFormatsConfig,\n\t\t\t}\n\t\t\tsite.Deps, err = first.Deps.ForLanguage(depsCfg, func(d *deps.Deps) error {\n\t\t\t\td.Site = &site.Info\n\t\t\t\treturn nil\n\t\t\t})\n\t\t\tif err != nil {\n\t\t\t\treturn whatChanged{}, err\n\t\t\t}\n\t\t}\n\t}\n\n\tif len(dataChanged) > 0 {\n\t\ts.h.init.data.Reset()\n\t}\n\n\tfor _, ev := range sourceChanged {\n\t\tremoved := false\n\n\t\tif ev.Op&fsnotify.Remove == fsnotify.Remove {\n\t\t\tremoved = true\n\t\t}\n\n\t\t// Some editors (Vim) sometimes issue only a Rename operation when writing an existing file\n\t\t// Sometimes a rename operation means that file has been renamed other times it means\n\t\t// it\'s been updated\n\t\tif ev.Op&fsnotify.Rename == fsnotify.Rename {\n\t\t\t// If the file is still on disk, it\'s only been updated, if it\'s not, it\'s been moved\n\t\t\tif ex, err := afero.Exists(s.Fs.Source, ev.Name); !ex || err != nil {\n\t\t\t\tremoved = true\n\t\t\t}\n\t\t}\n\t\tif removed && IsContentFile(ev.Name) {\n\t\t\th.removePageByFilename(ev.Name)\n\t\t}\n\n\t\tsourceReallyChanged = append(sourceReallyChanged, ev)\n\t\tsourceFilesChanged[ev.Name] = true\n\t}\n\n\tfor shortcode := range shortcodesChanged {\n\t\t// There are certain scenarios that, when a shortcode changes,\n\t\t// it isn\'t sufficient to just rerender the already parsed shortcode.\n\t\t// One example is if the user adds a new shortcode to the content file first,\n\t\t// and then creates the shortcode on the file system.\n\t\t// To handle these scenarios, we must do a full reprocessing of the\n\t\t// pages that keeps a reference to the changed shortcode.\n\t\tpagesWithShortcode := h.findPagesByShortcode(shortcode)\n\t\tfor _, p := range pagesWithShortcode {\n\t\t\tcontentFilesChanged = append(contentFilesChanged, p.File().Filename())\n\t\t}\n\t}\n\n\tif len(sourceReallyChanged) > 0 || len(contentFilesChanged) > 0 {\n\t\tvar filenamesChanged []string\n\t\tfor _, e := range sourceReallyChanged {\n\t\t\tfilenamesChanged = append(filenamesChanged, e.Name)\n\t\t}\n\t\tif len(contentFilesChanged) > 0 {\n\t\t\tfilenamesChanged = append(filenamesChanged, contentFilesChanged...)\n\t\t}\n\n\t\tfilenamesChanged = helpers.UniqueStrings(filenamesChanged)\n\n\t\tif err := s.readAndProcessContent(filenamesChanged...); err != nil {\n\t\t\treturn whatChanged{}, err\n\t\t}\n\n\t}\n\n\tchanged := whatChanged{\n\t\tsource: len(sourceChanged) > 0 || len(shortcodesChanged) > 0,\n\t\tother:  len(tmplChanged) > 0 || len(i18nChanged) > 0 || len(dataChanged) > 0,\n\t\tfiles:  sourceFilesChanged,\n\t}\n\n\treturn changed, nil\n\n}',
+    'func WebPageImageResolver(doc *goquery.Document) ([]candidate, int) {\n\timgs := doc.Find("img")\n\n\tvar candidates []candidate\n\tsignificantSurface := 320 * 200\n\tsignificantSurfaceCount := 0\n\tsrc := ""\n\timgs.Each(func(i int, tag *goquery.Selection) {\n\t\tvar surface int\n\t\tsrc = getImageSrc(tag)\n\t\tif src == "" {\n\t\t\treturn\n\t\t}\n\n\t\twidth, _ := tag.Attr("width")\n\t\theight, _ := tag.Attr("height")\n\t\tif width != "" {\n\t\t\tw, _ := strconv.Atoi(width)\n\t\t\tif height != "" {\n\t\t\t\th, _ := strconv.Atoi(height)\n\t\t\t\tsurface = w * h\n\t\t\t} else {\n\t\t\t\tsurface = w\n\t\t\t}\n\t\t} else {\n\t\t\tif height != "" {\n\t\t\t\tsurface, _ = strconv.Atoi(height)\n\t\t\t} else {\n\t\t\t\tsurface = 0\n\t\t\t}\n\t\t}\n\n\t\tif surface > significantSurface {\n\t\t\tsignificantSurfaceCount++\n\t\t}\n\n\t\ttagscore := score(tag)\n\t\tif tagscore >= 0 {\n\t\t\tc := candidate{\n\t\t\t\turl:     src,\n\t\t\t\tsurface: surface,\n\t\t\t\tscore:   score(tag),\n\t\t\t}\n\t\t\tcandidates = append(candidates, c)\n\t\t}\n\t})\n\n\tif len(candidates) == 0 {\n\t\treturn nil, 0\n\t}\n\n\treturn candidates, significantSurfaceCount\n\n}',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 512]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[1.0000, 0.6671, 0.2242],
+#         [0.6671, 1.0000, 0.3125],
+#         [0.2242, 0.3125, 1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 58,800 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                          | sentence_1                                                                            | label                                                         |
+  |:--------|:------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------|
+  | type    | string                                                                              | string                                                                                | float                                                         |
+  | details | <ul><li>min: 3 tokens</li><li>mean: 55.73 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 179.65 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
+* Samples:
+  | sentence_0                                                                                                                                                                                                                                                                                           | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | label            |
+  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>// CASNext is a non-callback, loop-based version of CAS method.<br>//<br>//  Usage is like this:<br>//<br>// var state memcached.CASState<br>// for client.CASNext(vb, key, exp, &state) {<br>//     state.Value = some_mutation(state.Value)<br>// }<br>// if state.Err != nil { ... }</code> | <code>func (c *Client) CASNext(vb uint16, k string, exp int, state *CASState) bool {<br>	if state.initialized {<br>		if !state.Exists {<br>			// Adding a new key:<br>			if state.Value == nil {<br>				state.Cas = 0<br>				return false // no-op (delete of non-existent value)<br>			}<br>			state.resp, state.Err = c.Add(vb, k, 0, exp, state.Value)<br>		} else {<br>			// Updating / deleting a key:<br>			req := &gomemcached.MCRequest{<br>				Opcode:  gomemcached.DELETE,<br>				VBucket: vb,<br>				Key:     []byte(k),<br>				Cas:     state.Cas}<br>			if state.Value != nil {<br>				req.Opcode = gomemcached.SET<br>				req.Opaque = 0<br>				req.Extras = []byte{0, 0, 0, 0, 0, 0, 0, 0}<br>				req.Body = state.Value<br><br>				flags := 0<br>				exp := 0 // ??? Should we use initialexp here instead?<br>				binary.BigEndian.PutUint64(req.Extras, uint64(flags)<<32|uint64(exp))<br>			}<br>			state.resp, state.Err = c.Send(req)<br>		}<br><br>		// If the response status is KEY_EEXISTS or NOT_STORED there's a conflict and we'll need to<br>		// get the new value (below). Otherwise, we're done (either ...</code> | <code>1.0</code> |
+  | <code>// RestoreResourcePools restores a bulk of resource pools, usually from a backup.</code>                                                                                                                                                                                                       | <code>func (f *Facade) RestoreResourcePools(ctx datastore.Context, pools []pool.ResourcePool) error {<br>	defer ctx.Metrics().Stop(ctx.Metrics().Start("Facade.RestoreResourcePools"))<br>	// Do not DFSLock here, ControlPlaneDao does that<br>	var alog audit.Logger<br>	for _, pool := range pools {<br>		alog = f.auditLogger.Message(ctx, "Adding ResourcePool").Action(audit.Add).Entity(&pool)<br>		pool.DatabaseVersion = 0<br>		if err := f.addResourcePool(ctx, &pool); err != nil {<br>			if err == ErrPoolExists {<br>				if err := f.updateResourcePool(ctx, &pool); err != nil {<br>					glog.Errorf("Could not restore resource pool %s via update: %s", pool.ID, err)<br>					return alog.Error(err)<br>				}<br>			} else {<br>				glog.Errorf("Could not restore resource pool %s via add: %s", pool.ID, err)<br>				return alog.Error(err)<br>			}<br>		}<br>		alog.Succeeded()<br>	}<br>	return nil<br>}</code>                                                                                                                                                                                                                | <code>1.0</code> |
+  | <code>// run starts a goroutine to handle client connects and broadcast events.</code>                                                                                                                                                                                                               | <code>func (s *Streamer) run() {<br>	go func() {<br>		for {<br>			select {<br>			case cl := <-s.connecting:<br>				s.clients[cl] = true<br><br>			case cl := <-s.disconnecting:<br>				delete(s.clients, cl)<br><br>			case event := <-s.event:<br>				for cl := range s.clients {<br>					// TODO: non-blocking broadcast<br>					//select {<br>					//case cl <- event: // Try to send event to client<br>					//default:<br>					//	fmt.Println("Channel full. Discarding value")<br>					//}<br>					cl <- event<br>				}<br>			}<br>		}<br>	}()<br>}</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | <code>1.0</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 200
+- `per_device_eval_batch_size`: 200
+- `fp16`: True
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 200
+- `per_device_eval_batch_size`: 200
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 3
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss |
+|:------:|:----:|:-------------:|
+| 1.7007 | 500  | 0.2697        |
+### Framework Versions
+- Python: 3.10.12
+- Sentence Transformers: 5.0.0
+- Transformers: 4.53.1
+- PyTorch: 2.7.0+cu128
+- Accelerate: 1.7.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.2
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "</s>": 30001,
+  "<mask>": 30004,
+  "<pad>": 30003,
+  "<s>": 30000,
+  "<unk>": 30002
+}

config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "architectures": [
+    "ModernBertModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 30000,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "cls",
+  "cls_token_id": 50281,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 30001,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "hidden_activation": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 512,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "local_attention": 128,
+  "local_attention_rope_theta": 10000,
+  "local_attention_window": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 8,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "repad_logits_with_grad": false,
+  "rope_theta": 160000,
+  "sep_token_id": 50282,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.1",
+  "type_vocab_size": 2,
+  "vocab_size": 30005
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.0.0",
+    "transformers": "4.53.1",
+    "pytorch": "2.7.0+cu128"
+  },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e0d9d8597cf0feb08e28d116d717de8d0ea1c5173e8a196cbf5349647357d1b
+size 162143824

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 1024,
+    "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "30000": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30001": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30002": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30003": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30004": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": 256,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff